diff --git a/.gitignore b/.gitignore index b8bd0267..6f773328 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +# Build dir +/build* + +# Kdev files +*.kdev4 + # Compiled Object files *.slo *.lo diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f38c24fd..00000000 --- a/.travis.yml +++ /dev/null @@ -1,72 +0,0 @@ -language: cpp - -matrix: - include: -# - os: linux -# env: COMPILER=icc CMAKE_BUILD_TYPE=Debug - - os: linux - env: COMPILER=gcc CMAKE_BUILD_TYPE=Debug - - os: linux - env: COMPILER=clang CMAKE_BUILD_TYPE=Debug - - os: osx - env: COMPILER=clang CMAKE_BUILD_TYPE=Debug -# - os: linux -# env: COMPILER=icc CMAKE_BUILD_TYPE=Release - - os: linux - env: COMPILER=gcc CMAKE_BUILD_TYPE=Release - - os: linux - env: COMPILER=clang CMAKE_BUILD_TYPE=Release - - os: osx - env: COMPILER=clang CMAKE_BUILD_TYPE=Release - -fast_finish: true - -install: - - | - if [ "${TRAVIS_OS_NAME}" == 'linux' ]; then - sudo apt-get --no-install-recommends install texinfo texi2html texlive-base texlive-generic-recommended texlive-fonts-recommended doxygen graphviz texlive-latex-base texlive-latex-recommended texlive-latex-extra python-git - else - brew install texinfo - brew ln texinfo --force - brew upgrade cmake - fi - - | - if [ $COMPILER == icc ]; then - wget "https://raw.githubusercontent.com/nemequ/icc-travis/master/install-icc.sh" - sudo sh install-icc.sh - source ~/.bashrc - export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc" - fi - - | - if [ $COMPILER == clang ]; then - export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang" - fi - -script: - - | - srcdir='..' - mkdir build - cd build - cmake $CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_DOC=ON -DBUILD_TESTING=ON -DDISABLE_REFMAN_PDF=ON .. - - - make lib - - - export CTEST_OUTPUT_ON_FAILURE=1 - - | - if [ $COMPILER != icc ]; then - make check-testsuite # Build and check the testsuite - make check-examples # Build and check the examples - # TODO! (fails with error: no such file or directory: 'arrdaxpyf.o') - # - make check-benchmarks # Build and check the benchmarks (takes a long time) - make blitz-doc - sudo make install # Install Blitz++ - fi - - - | - if [ "${TRAVIS_OS_NAME}" == 'linux' ]; then - LD_LIBRARY_PATH=/usr/local/lib ${srcdir}/travis-ci/check-wiki-examples.py - fi - - -after_script: - - if [ $COMPILER == icc ]; then '[[ ! -z "${INTEL_INSTALL_PATH}" ]] && uninstall_intel_software'; fi diff --git a/CMakeLists.txt b/CMakeLists.txt index d4ac2629..bbd555aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,4 @@ -cmake_minimum_required(VERSION 3.12) # FindPython -cmake_policy(VERSION 3.1) - -set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") +cmake_minimum_required(VERSION 3.16) # FindPython # Blitz version and release date. @@ -11,76 +8,96 @@ set(blitz_PATCH 3) set(blitz_VERSION ${blitz_MAJOR}.${blitz_MINOR}.${blitz_PATCH}) set(BZ_PACKAGE_STRING "${CMAKE_PROJECT_NAME} ${blitz_MAJOR}.${blitz_MINOR}") -# Set the project +# Set the project -project(blitz VERSION ${blitz_VERSION} LANGUAGES CXX) +project(blitz VERSION ${blitz_VERSION} + LANGUAGES CXX + DESCRIPTION "A multi-array library -- fork by AndrĂ¡s Vukics" + HOMEPAGE_URL "https://github.com/vukics/blitz") -option(BUILD_DOC "Build documentation" OFF) -option(BUILD_TESTING "Build tests, examples and benchmarks" OFF) +# Ubuntu 20.04LTS versions for compilers -# Packaging +set(GCC_MINIMAL 9.3) +set(CLANG_MINIMAL 10.0) -set(CPACK_GENERATOR "TGZ") -set(CPACK_PACKAGE_VERSION_MAJOR ${blitz_MAJOR}) -set(CPACK_PACKAGE_VERSION_MINOR ${blitz_MINOR}) -set(CPACK_PACKAGE_VERSION_PATCH ${blitz_PATCH}) -set(CPACK_SOURCE_GENERATOR "TBZ2") -set(CPACK_SOURCE_PACKAGE_FILE_NAME blitz-${blitz_VERSION}) -set(CPACK_SOURCE_IGNORE_FILES ".*.swp$" ".*.swo$" "~$" ".bz2$" ".gz$") +if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS ${GCC_MINIMAL}) + message(FATAL_ERROR "GCC g++ version >= ${GCC_MINIMAL} needed.") +endif () -include(CPack) -include(UnitTests) -include(BlitzConfigFileName) -include(Win32Compat) -include(CreatePkgConfig) -include(GNUInstallDirs) -include(CMakePackageConfigHelpers) +if (${CMAKE_CXX_COMPILER_ID} STREQUAL Clang AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS ${CLANG_MINIMAL}) + message(FATAL_ERROR "Clang version >= ${CLANG_MINIMAL} needed.") +endif () -include_directories(BEFORE SYSTEM ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}) +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") -add_custom_target(generated-headers ALL) +# Packaging -set(PKGCONFIG_LIBS -lblitz) +#set(CPACK_GENERATOR "TGZ") +#set(CPACK_PACKAGE_VERSION_MAJOR ${blitz_MAJOR}) +#set(CPACK_PACKAGE_VERSION_MINOR ${blitz_MINOR}) +#set(CPACK_PACKAGE_VERSION_PATCH ${blitz_PATCH}) +#set(CPACK_SOURCE_GENERATOR "TBZ2") +#set(CPACK_SOURCE_PACKAGE_FILE_NAME blitz-${blitz_VERSION}) +#set(CPACK_SOURCE_IGNORE_FILES ".*.swp$" ".*.swo$" "~$" ".bz2$" ".gz$") + +#include(CPack) +#include(UnitTests) +#include(GNUInstallDirs) +#include(CreatePkgConfig) +#include(CMakePackageConfigHelpers) add_subdirectory(blitz) -add_subdirectory(random) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") + +include(GetGitRevisionDescription) +get_git_head_revision(BLITZ_GIT_REFSPEC BLITZ_GIT_SHA1) +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/blitzGitSHA1.cc.in" "${CMAKE_CURRENT_BINARY_DIR}/blitzGitSHA1.cc" @ONLY) + +add_library(blitzGitSHA1 OBJECT "${CMAKE_CURRENT_BINARY_DIR}/blitzGitSHA1.cc") + + add_subdirectory(src) -if (BUILD_DOC) - add_subdirectory(doc) -endif() -if (BUILD_TESTING) - add_subdirectory(testsuite) - add_subdirectory(examples) - add_subdirectory(benchmarks) -endif() +add_library(Blitz SHARED $ $) +add_library(Blitz::Blitz ALIAS Blitz) -# Add a custom target to mimic autotools "make lib" +target_link_libraries(Blitz PUBLIC Blitz::BlitzHeaders) + +set_target_properties(Blitz PROPERTIES + VERSION ${blitz_VERSION} + SOVERSION 0 + CLEAN_DIRECT_OUTPUT 1) + + +if (CMAKE_PROJECT_NAME STREQUAL "blitz") # check whether the current file is top-level CMakeLists.txt + add_subdirectory(testsuite) +endif() -add_custom_target(lib DEPENDS blitz) +install(TARGETS Blitz DESTINATION lib) # Configuration files. -CreatePkgConfigInfo(blitz "") -CreatePkgConfigInfo(blitz-static static) +#CreatePkgConfigInfo(blitz "") +#CreatePkgConfigInfo(blitz-static static) -write_basic_package_version_file( - cmake/${PROJECT_NAME}ConfigVersion.cmake - VERSION ${PROJECT_VERSION} - COMPATIBILITY AnyNewerVersion) -configure_package_config_file( - cmake/${PROJECT_NAME}Config.cmake.in - cmake/${PROJECT_NAME}Config.cmake - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/) +#write_basic_package_version_file( + #cmake/${PROJECT_NAME}ConfigVersion.cmake + #VERSION ${PROJECT_VERSION} + #COMPATIBILITY AnyNewerVersion) +#configure_package_config_file( + #cmake/${PROJECT_NAME}Config.cmake.in + #cmake/${PROJECT_NAME}Config.cmake + #INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/) # Installation -install(EXPORT ${PROJECT_NAME}Targets DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake) -install(FILES - ${CMAKE_BINARY_DIR}/blitz.pc - ${CMAKE_BINARY_DIR}/blitz-static.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -install(FILES - ${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake - ${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/) +#install(EXPORT ${PROJECT_NAME}Targets DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake) +#install(FILES + #${CMAKE_BINARY_DIR}/blitz.pc + #${CMAKE_BINARY_DIR}/blitz-static.pc + #DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +#install(FILES + #${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake + #${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake + #DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/) diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 236b2104..00000000 --- a/ChangeLog +++ /dev/null @@ -1,1900 +0,0 @@ -changeset: 1917:77c9002404a7 -tag: tip -user: Julian Cummings -date: Tue Jul 03 16:30:09 2012 -0700 -files: COPYRIGHT LEGAL LICENSE README -description: -Updated legal information in README and LEGAL files. Updated LICENSE file to use version 2.0 of the Perl Artistic License as required for use with Fedora. Updated language in COPYRIGHT file for BSD license. - - -changeset: 1916:7c6844d1a805 -user: Julian Cummings -date: Wed Jun 27 18:36:46 2012 -0700 -files: NEWS -description: -A brief note on the new 0.10 snapshot release. - - -changeset: 1915:a8ea119bc0dc -user: Julian Cummings -date: Wed Jun 27 18:27:21 2012 -0700 -files: doc/stamp-vti doc/version.texi -description: -Update datestamp and version number for documentation. - - -changeset: 1914:fc43218c3b7e -user: Julian Cummings -date: Wed Jun 27 18:23:08 2012 -0700 -files: Makefile.am -description: -Added Blitz-VS2010.zip to the list of files included in the blitz package. - - -changeset: 1913:4014cbb0ff08 -user: Julian Cummings -date: Wed Jun 27 18:19:24 2012 -0700 -files: Blitz-VS2010.zip -description: -New version of support files for compiling blitz with Microsoft Visual Studio 2010. - - -changeset: 1912:927504bb5536 -user: Julian Cummings -date: Tue Jun 26 17:33:54 2012 -0700 -files: blitz/array/expr.h blitz/array/functorExpr.h blitz/array/stencil-et-macros.h blitz/array/where.h -description: -Patch from Sylwester Arabas for blitz compatibility with clang 3.1 compiler. Changes seem fine with g++ compiler. - - -changeset: 1911:e1a8ec015bde -user: Julian Cummings -date: Tue Jun 26 17:15:12 2012 -0700 -files: benchmarks/daxpy.cpp benchmarks/daxpy2.cpp benchmarks/loop4.cpp benchmarks/makeloops.cpp benchmarks/stencil.cpp -description: -Corrected typo in BenchmarkExt class member name that was discovered in several of the benchmark test codes. - - -changeset: 1910:8f8423701853 -user: Julian Cummings -date: Tue Jun 26 16:47:42 2012 -0700 -files: blitz/globeval.cc blitz/memblock.h blitz/tmevaluate.h blitz/tvevaluate.h -description: -Patch from Matt Bingen to eliminate compiler warnings about using pragmas within macros and to use ptrdiff_t instead of int in a few more places for 64-bit safety. - - -changeset: 1909:11a167db8e5f -user: Julian Cummings -date: Tue Jun 26 16:44:55 2012 -0700 -files: blitz/array-impl.h -description: -Patch from Matt Bingen to allow the default storage type for Array to be configurable. - - -changeset: 1908:a46cacb46e8f -user: Julian Cummings -date: Tue Jun 26 16:38:50 2012 -0700 -files: testsuite/initialize.cpp -description: -Workarounds for testsuite code that attempts to use list initialization on Vectors that reference data stored non-consecutively in another Vector. Such code no longer works because the current stand-in Vector class is just a wrapper for the 1D Array class. - - -changeset: 1907:ac2b17fed6c4 -user: Julian Cummings -date: Tue Jun 26 14:28:43 2012 -0700 -files: examples/Makefile.am -description: -Commented out several example programs which rely on the Vector classes and are therefore not currently functional. The Vector classes need to be updated to use the new style expression template machinery in blitz. The old style ET machinery has been removed to reduce the overall amount of blitz code. - - -changeset: 1906:f88ef337efee -user: Julian Cummings -date: Tue Jun 26 14:25:30 2012 -0700 -files: examples/where.cpp -description: -Modified example code to use 1D Array instead of Vector class, which is currently non-functional. - - -changeset: 1905:2fa73f1ae57a -user: Julian Cummings -date: Tue Jun 26 14:24:22 2012 -0700 -files: examples/tiny.cpp -description: -Updated include file list to eliminate , which is obsolete. - - -changeset: 1904:ef381c095e49 -user: Julian Cummings -date: Tue Jun 26 14:13:11 2012 -0700 -files: blitz/array-impl.h -description: -Revert ListInitializationSwitch back to using a T_numtype* rather than a T_iterator to traverse Array memory for list initialization. By definition, a comma-delimited list will initialize an Array in memory storage order, so a standard pointer is adequate as long as the memory storage is contiguous. This change fixes an error that was occurring in the storage testsuite code. - - -changeset: 1903:d1291bffbb19 -user: Julian Cummings -date: Tue Jun 26 14:07:16 2012 -0700 -files: testsuite/reduce.cpp -description: -Removed sum expression that was not used in any test and was triggering an error. - - -changeset: 1902:56a36918ccf1 -user: Patrik Jonsson -date: Tue Apr 24 16:04:24 2012 -0400 -files: configure.ac -description: -Removed check for boost::mpi library since we only need the include file. - - -changeset: 1901:20ebbb315dd7 -user: Patrik Jonsson -date: Tue Apr 24 11:56:15 2012 -0700 -files: blitz/array/storage.h blitz/tinyvec2.h configure.ac -description: -Added a separate check for the Boost::MPI library when serialization is enabled, since one can imagine wanting serialization without MPI. - - -changeset: 1900:2663e207d21f -user: Patrik Jonsson -date: Tue Apr 17 13:14:16 2012 -0400 -files: blitz/array-impl.h blitz/array/ops.cc testsuite/Makefile.am testsuite/update.cpp -description: -Fixed Array update operators for scalars, which the previous change broke. Added testcase update to exercise the Array update operators for scalars, Arrays, and expressions. - - -changeset: 1899:5e7778d680d6 -user: Patrik Jonsson -date: Tue Apr 17 12:49:59 2012 -0400 -files: blitz/array-impl.h blitz/array/ops.cc testsuite/multicomponent.cpp -description: -The Array update operators (like +=) are now defined exactly like the assignment operator, which was not the case before, so some expressions that worked as assignments didn't work as updates. Updated multicomponent test case to exercise this. - - -changeset: 1898:3acfc76c2df9 -user: Patrik Jonsson -date: Tue Mar 06 13:12:04 2012 -0500 -files: blitz/array/ops.h testsuite/Makefile.am testsuite/bitwise.cpp -description: -Restored left and right shift operators, which were commented out, and added scalar versions of them as well. Added a test case for the bitwise operators (currently only for TinyVector). There appears to be no ambiguity to me, the operators take ETBase arguments and should not conflict with the I/O operators. At least, all tests pass and my code compiles fine. - - -changeset: 1897:262deb840568 -user: Patrik Jonsson -date: Wed Jan 25 10:48:31 2012 -0500 -files: blitz/tinyvec2.h -description: -Added extent() member to TinyVector in analogy with Array and TinyMatrix. - - -changeset: 1896:d4a8862b671a -user: Patrik Jonsson -date: Mon Dec 12 17:11:18 2011 -0500 -files: blitz/tinymat2.cc -description: -One more warning fix. - - -changeset: 1895:4d8789826636 -user: Patrik Jonsson -date: Mon Dec 12 17:00:20 2011 -0500 -files: blitz/array/reduce.h blitz/array/storage.h blitz/globeval.cc blitz/indexexpr.h blitz/memblock.cc blitz/shapecheck.h -description: -Fixed some warnings with gcc -Wall. - - -changeset: 1894:90c2f00d236e -user: Patrik Jonsson -date: Tue Dec 06 18:55:54 2011 -0500 -files: blitz/array/newet-macros.h blitz/funcs.h testsuite/Makefile.am testsuite/int-math-func.cpp -description: -Changed the BZ_DEFINE_UNARY_FUNC macro so the functors now have the option of using a double return type for integer types. This avoids bug 3381318 where doing sin(intarray) returns ints even if the results go into a floating array. Turned off this option for functions like abs() that can sensibly be called with integer types. Added testcase int-math-func to test this. - - -changeset: 1893:e29277a6148b -user: Patrik Jonsson -date: Tue Dec 06 16:27:08 2011 -0500 -files: blitz/array/funcs.h blitz/funcs.h -description: -Removed the special definitions of abs() in array/funcs.h. Since all types are overloaded as abs() in current cmath, this should no longer be necessary. This also fixes bug 3427014. - - -changeset: 1892:a8f6391064b0 -user: Patrik Jonsson -date: Tue Dec 06 16:24:18 2011 -0500 -files: testsuite/Makefile.am -description: -Fixed a bug in the testsuite Makefile.am. - - -changeset: 1891:2b8b3ea5cd97 -user: Patrik Jonsson -date: Tue Dec 06 15:45:36 2011 -0500 -files: testsuite/Makefile.am -description: -Changed the testsuite Makefile so all tests are run even if one fails. - - -changeset: 1890:4f7d8ef18b48 -user: Patrik Jonsson -date: Mon Dec 05 15:14:49 2011 -0500 -files: blitz/blitz.h random/mt.h testsuite/serialize.cpp -description: -Added ability to serialize the Mersenne Twister state, and a test of this to the serialize test case. Also, blitz.h now includes bzconfig.h. - - -changeset: 1889:d4ea469afaa0 -user: Patrik Jonsson -date: Tue Nov 29 22:07:47 2011 -0500 -files: blitz/globeval.cc testsuite/Makefile.am testsuite/indexexpr-base.cpp -description: -Fixed a bug in the 1D index traversal evaluation that caused results for arrays with nonzero base to be written out of bounds. Fixes issue 3441913. Added indexexpr_base to the testsuite to check this. - - -changeset: 1888:9a7e4e6adaa2 -user: Patrik Jonsson -date: Wed Nov 23 09:42:33 2011 -0500 -files: blitz/array-impl.h blitz/array/storage.h blitz/tinymat2.h blitz/tinyvec2.h -description: -Moved declaration of boost::mpi::is_mpi_datatype out of the blitz namespace. - - -changeset: 1887:c3f9ffd1a3d3 -user: Patrik Jonsson -date: Tue Nov 22 15:39:45 2011 -0500 -files: blitz/array-impl.h blitz/array/storage.h blitz/memblock.cc blitz/memblock.h blitz/tinymat2.h blitz/tinyvec2.h -description: -Improved the Boost::Serialization support so that it works with the skeleton/content functionality in boost::MPI. Declared boost::mpi::is_mpi_datatype appropriate data types. - - -changeset: 1886:9527b95a58b5 -user: Patrik Jonsson -date: Thu Oct 27 14:07:17 2011 -0400 -files: blitz/memblock.h -description: -Fixed a segfault when serializing MemoryBlockReferences without a block. - - -changeset: 1885:41b58087d239 -user: Patrik Jonsson -date: Wed Oct 26 17:15:35 2011 -0400 -files: configure.ac -description: -Added --enable-serialization configure option. - - -changeset: 1884:d05049b205e5 -user: Patrik Jonsson -date: Wed Oct 26 16:35:20 2011 -0400 -files: benchmarks/Makefile.am examples/Makefile.am -description: -Fixed makefiles for benchmarks and examples dirs. - - -changeset: 1883:daba8c883045 -user: Patrik Jonsson -date: Wed Oct 26 16:24:41 2011 -0400 -files: blitz/array-impl.h blitz/array/storage.h blitz/memblock.h blitz/tinymat2.h blitz/tinyvec2.h lib/Makefile.am testsuite/Makefile.am testsuite/serialize.cpp -description: -Updated flags for serialization support, tweaked Makefiles. - - -changeset: 1882:68f7fb7d191e -user: Patrik Jonsson -date: Wed Oct 26 16:02:13 2011 -0400 -files: blitz/array-impl.h blitz/array/storage.h blitz/memblock.cc blitz/memblock.h blitz/tinymat2.h blitz/tinyvec2.h configure.ac m4/ax_boost_base.m4 m4/ax_boost_serialization.m4 testsuite/Makefile.am testsuite/serialize.cpp -description: -Added Boost::Serialization support for Array, TinyVector and TinyMatrix. Added --with-boost and --with-boost-serialization configure options. - - -changeset: 1881:4de0b8fd18cd -user: Patrik Jonsson -date: Thu Sep 29 15:35:03 2011 -0400 -files: blitz/tmevaluate.h blitz/tvevaluate.h configure.ac testsuite/alignment.cpp -description: -Added macro BZ_USE_ALIGNMENT_PRAGMAS, which is only set to true if a SIMD width is specified. If this is not defined, the alignment pragmas are not used. This fixes bug 3415448. - - -changeset: 1880:f360eaedb125 -user: Patrik Jonsson -date: Wed Sep 28 08:47:19 2011 -0400 -files: testsuite/64bit.cpp -description: -Updated 64bit test case to also test that fastRead for the expression classes also works with large arguments. - - -changeset: 1879:54b961d29fc0 -user: Patrik Jonsson -date: Tue Sep 27 16:12:21 2011 -0400 -files: blitz/array/expr.h blitz/array/fastiter.h blitz/array/functorExpr.h blitz/array/where.h blitz/generate/genstencils.py blitz/indexexpr.h blitz/levicivita.h blitz/range.h blitz/tinymat2.h blitz/tinyvec2.h blitz/tm2fastiter.h blitz/tv2fastiter.h -description: -Changed argument of the fastRead() functions from int to diffType, which is also needed for 64-bit offsets to work. - - -changeset: 1878:ed2dfe67697b -parent: 1877:da484a923d69 -parent: 1875:eb2f78ddf0b4 -user: Patrik Jonsson -date: Tue Sep 27 15:43:22 2011 -0400 -files: testsuite/Makefile.am -description: -Merge. - - -changeset: 1877:da484a923d69 -user: Patrik Jonsson -date: Tue Sep 27 15:29:55 2011 -0400 -files: blitz/globeval.cc testsuite/64bit.cpp testsuite/Makefile.am -description: -Further 64-bit fixes. Changed the 64bit test case to catch these problems, which unfortunately precludes only evaluating over a subrange. Therefore, I've enabled optimization for the 64bit test only so it doesn't take forever to run. - - -changeset: 1876:84f0b039f76d -parent: 1874:450f9b1ee2d0 -user: Patrik Jonsson -date: Tue Sep 27 14:31:34 2011 -0400 -files: blitz/globeval.cc -description: -Changed offset type in evaluation from int to diffType, because otherwise evaluations with offsets > 1<<31 fails. - - -changeset: 1875:eb2f78ddf0b4 -user: Patrik Jonsson -date: Mon Sep 26 19:11:43 2011 -0700 -files: blitz/array/expr.h testsuite/Makefile.am testsuite/cast.cpp -description: -Fixed broken cast float->double by removing a ArrayExprUnaryOp constructor that caused ambiguous resolution and apparently isn't actually used. Added test case cast. - - -changeset: 1874:450f9b1ee2d0 -parent: 1873:1e77ae68003b -parent: 1871:4223fee30010 -user: Patrik Jonsson -date: Mon Sep 26 21:07:37 2011 -0400 -description: -Merged. - - -changeset: 1873:1e77ae68003b -user: Patrik Jonsson -date: Mon Sep 26 21:07:13 2011 -0400 -files: testsuite/reduce.cpp -description: -Added test for bug 2058441. - - -changeset: 1872:760e2d93d633 -parent: 1862:691355238c25 -user: Patrik Jonsson -date: Tue Jul 26 17:01:08 2011 -0400 -files: blitz/array/reduce.h -description: -Added comment to reduce.h documenting bug 2058441. - - -changeset: 1871:4223fee30010 -user: Patrik Jonsson -date: Thu Aug 11 14:42:04 2011 -0700 -files: blitz/tinyvec2.h -description: -Removed spurious inline declarations. - - -changeset: 1870:3448111f885d -user: Patrik Jonsson -date: Thu Aug 11 11:40:38 2011 -0700 -files: blitz/tmevaluate.h blitz/tvevaluate.h -description: -Fixed clobbering of template parameter. - - -changeset: 1869:fa21544fdcbd -user: Patrik Jonsson -date: Thu Aug 11 09:49:15 2011 -0700 -files: blitz/globeval.cc -description: -Fixed gcc complaining about rank() not being a constant-expression. - - -changeset: 1868:a6f80db77b94 -user: patricg -date: Thu Jul 28 22:38:01 2011 +0100 -files: doc/stamp-vti doc/version.texi -description: -Updated. - - -changeset: 1867:c73aba3b07e9 -user: patricg -date: Thu Jul 28 22:23:42 2011 +0100 -files: blitz/generate/Makefile.am -description: -Added python script to EXTRA_DIST in order to pass checkdist. - - -changeset: 1866:c243451132a6 -user: patricg -date: Thu Jul 28 22:21:30 2011 +0100 -files: doc/stencils/Makefile.am -description: -Renamed the preprocessorp macro "stenciltoapply" to the new name -corresponding to the new internal stencil operator (that take -iterators as opposed to the stencil ET expression operators that -take arrays), i.e. appended '_stencilop' to the operator name. - - -changeset: 1865:06bd97b19920 -user: patricg -date: Thu Jul 28 21:21:48 2011 +0100 -files: configure.ac -description: -Added clang++ in the list of C++ compilers to check. - - -changeset: 1864:fa98fbc6bd4d -user: patricg -date: Thu Jul 28 21:19:58 2011 +0100 -files: m4/ac_cxx_flags_preset.m4 -description: -Added an entry for the clang++ compiler (http://clang.llvm.org/). - - -changeset: 1863:d68ac0a95712 -user: patricg -date: Thu Jul 28 21:16:18 2011 +0100 -files: m4/ac_prog_doxygen.m4 -description: -Changed default behaviour to doxygen disabled. - - -changeset: 1862:691355238c25 -user: Patrik Jonsson -date: Fri Jul 22 12:18:54 2011 -0400 -files: blitz/array/ops.cc -description: -Added _bz_forceinline to array assignment operators. - - -changeset: 1861:85ea8a3b7b34 -user: Patrik Jonsson -date: Fri Jul 22 12:07:53 2011 -0400 -files: blitz/globeval.cc -description: -Commented out '#pragma forceinline recursive' in the evaluation functions, since it seems to increase compilation time catastrophically. - - -changeset: 1860:e89dadf02f56 -user: Patrik Jonsson -date: Fri Jul 22 11:36:25 2011 -0400 -files: blitz/Makefile.am -description: -Added tmevaluate.h to blitz_HEADERS. - - -changeset: 1859:ec1d33eba15b -user: Patrik Jonsson -date: Fri Jul 22 11:24:21 2011 -0400 -files: blitz/Makefile.am blitz/tinymat2.cc blitz/tinymat2.h blitz/tinyvec2.h blitz/tm2ops.cc blitz/tmevaluate.h blitz/tv2ops.cc blitz/tvevaluate.h -description: -Added compile-time selection of TinyVector-only expressions so the full evaluator is not instantiated in these cases. Ported the TinyVector-only evaluation machinery also for TinyMatrix, so TM expressions use the lightweight evaluation. (With some thinking, these two could probably be merged into one code.) With these changes, icpc v12 is now able to compile the multicomponent testcase. - - -changeset: 1858:c7413cdd3fac -user: Patrik Jonsson -date: Thu Jul 21 16:26:31 2011 -0400 -files: blitz/funcs.h -description: -pow2-8 now works also for complex scalars. - - -changeset: 1857:9a8d54c82f39 -user: Patrik Jonsson -date: Thu Jul 21 15:38:33 2011 -0400 -files: blitz/funcs.h -description: -Restored pow2-8 functionality for builtin types. - - -changeset: 1856:d0150e83962d -user: Patrik Jonsson -date: Thu Jul 21 13:38:50 2011 -0400 -files: configure.ac -description: -Configure now defines BZ_ALIGN_VARIABLE to just a standard declaration if a simd width is not specified, rather than use alignment 1 as that gives warnings. - - -changeset: 1855:494a2ee3abf9 -user: Patrik Jonsson -date: Wed Jul 20 14:27:13 2011 -0700 -files: blitz/globeval.cc -description: -Fixed compilation error with gcc, which apparently insists that fastRead_tv be explicitly template qualified. - - -changeset: 1854:f21b1f853e06 -user: Patrik Jonsson -date: Wed Jul 20 14:26:08 2011 -0700 -files: blitz/generate/genstencils.py -description: -Fixed a typo in the generated stencil ET code. - - -changeset: 1853:75e5322949b6 -user: Patrik Jonsson -date: Wed Jul 20 14:25:26 2011 -0700 -files: blitz/simdtypes.h -description: -Added inclusion of stdint.h to find uintptr_t. - - -changeset: 1852:1642b5d3f4df -user: Patrik Jonsson -date: Wed Jul 20 17:15:28 2011 -0400 -files: blitz/array/expr.cc blitz/indexexpr.h blitz/timer.h -description: -Fixed some compilation errors with gcc. - - -changeset: 1851:2cccdb4ade84 -user: Patrik Jonsson -date: Wed Jul 20 11:21:13 2011 -0400 -files: blitz/array/asexpr.h -description: -Renamed template parameter T_result in the results traits classes. - - -changeset: 1850:e61bfbb651fb -user: Patrik Jonsson -date: Tue Jul 19 15:25:27 2011 -0400 -files: blitz/array-impl.h -description: -Removed 'restrict' from the getInitializationIterator return type since it doesn't make sense. - - -changeset: 1849:dc958e1d2c5b -user: Patrik Jonsson -date: Tue Jul 19 15:24:29 2011 -0400 -files: blitz/Makefile.am -description: -Added tvevaluate.h to blitz_HEADERS. - - -changeset: 1848:fbe66ccb53ee -user: Patrik Jonsson -date: Tue Jul 19 15:23:36 2011 -0400 -files: blitz/simdtypes.h -description: -Fixed an error where some compilers complain about the '>1>' construct. - - -changeset: 1847:6e8b33f8b407 -user: Patrik Jonsson -date: Thu Jun 30 20:54:52 2011 -0700 -files: blitz/memblock.cc blitz/memblock.h blitz/simdtypes.h -description: -Fixed a bug in the shift-to-cache-line during memoryBlock allocation that would cause badness when T_numtype didn't fit evenly into a cache line (like for multicomponent Arrays.) - - -changeset: 1846:4d4602e6c605 -user: Patrik Jonsson -date: Thu Jun 30 22:16:33 2011 -0400 -files: blitz/tvevaluate.h -description: -Added tvevaluate.h which was forgotten in r1845. - - -changeset: 1845:d2b487a42b58 -user: Patrik Jonsson -date: Thu Jun 30 18:46:06 2011 -0400 -files: blitz/globeval.cc blitz/tinyvec2.cc blitz/tuning.h -description: -Moved the _tv_evalute_(un)aligned functions into a class _tv_evaluator for the purpose of avoiding meta-unrolling very long TinyVector expressions. It is advantageous to binary-unroll short array expressions and then use wider vector widths than the simd width itself during evaluation. Added defines BZ_MAX_BITS_FOR_BINARY_UNROLL and BZ_VECTORIZED_LOOP_WIDTH to tuning.h to control this. - - -changeset: 1844:4db0fbbcc036 -user: Patrik Jonsson -date: Thu Jun 30 14:18:23 2011 -0400 -files: blitz/memblock.cc -description: -Fixed typo in memblock.cc. - - -changeset: 1843:6049c40c2237 -user: Patrik Jonsson -date: Thu Jun 30 10:35:05 2011 -0700 -files: blitz/array-impl.h blitz/levicivita.h blitz/range.h -description: -Fixed a few more warnings. - - -changeset: 1842:981ac7763b8d -user: Patrik Jonsson -date: Thu Jun 30 10:16:39 2011 -0700 -files: blitz/tuning.h -description: -Fixed warning about redefined macros in tuning.h - - -changeset: 1841:2adbcdadf146 -user: Patrik Jonsson -date: Thu Jun 30 10:15:18 2011 -0700 -files: blitz/tuning.h -description: -Fixed warning about redefined macros in tuning.h - - -changeset: 1840:6e1433edffa5 -user: Patrik Jonsson -date: Thu Jun 30 09:50:15 2011 -0700 -files: benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop11.cpp benchmarks/floop12.cpp benchmarks/floop13.cpp benchmarks/floop14.cpp benchmarks/floop15.cpp benchmarks/floop16.cpp benchmarks/floop17.cpp benchmarks/floop18.cpp benchmarks/floop19.cpp benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop22.cpp benchmarks/floop23.cpp benchmarks/floop24.cpp benchmarks/floop25.cpp benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop5.cpp benchmarks/floop6.cpp benchmarks/floop8.cpp benchmarks/floop9.cpp benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop100.cpp benchmarks/loop11.cpp benchmarks/loop12.cpp benchmarks/loop13.cpp benchmarks/loop14.cpp benchmarks/loop15.cpp benchmarks/loop16.cpp benchmarks/loop17.cpp benchmarks/loop18.cpp benchmarks/loop19.cpp benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop22.cpp benchmarks/loop23.cpp benchmarks/loop24.cpp benchmarks/loop25.cpp benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop5.cpp benchmarks/loop6.cpp benchmarks/loop8.cpp benchmarks/loop9.cpp blitz/generate/makeloops.py -description: -Removed the TinyVector version in the benchmark loops. Since we can't measure it reliably anyway, it seems more useful to run a denser set of array sizes. - - -changeset: 1839:d79f66a7f6e7 -user: Patrik Jonsson -date: Thu Jun 30 09:38:51 2011 -0700 -files: blitz/memblock.cc blitz/simdtypes.h blitz/tuning.h -description: -Turned on the align blocks on cache line option, as it makes a substantial difference at least on my machine. Moved the definitions of the L1 cache line size and critical memory block size for aligning to tuning.h. - - -changeset: 1838:06aa6f2889f6 -user: Patrik Jonsson -date: Wed Jun 29 12:27:50 2011 -0700 -files: blitz/array/ops.cc blitz/globeval.cc blitz/tinyvec2.cc blitz/tuning.h -description: -Added macro _bz_forceinline, which expands to __forceinline for icpc, and used that for the evaluation functions. Unfortunately, the directive doesn't actually do what it promises, leaving intermediate function calls. Asked Intel about this. - - -changeset: 1837:6c2b4079cfe6 -user: Patrik Jonsson -date: Wed Jun 29 10:39:20 2011 -0700 -files: benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop10f90.f90 benchmarks/floop11.cpp benchmarks/floop11f90.f90 benchmarks/floop12.cpp benchmarks/floop12f90.f90 benchmarks/floop13.cpp benchmarks/floop13f90.f90 benchmarks/floop14.cpp benchmarks/floop14f90.f90 benchmarks/floop15.cpp benchmarks/floop15f90.f90 benchmarks/floop16.cpp benchmarks/floop16f90.f90 benchmarks/floop17.cpp benchmarks/floop17f90.f90 benchmarks/floop18.cpp benchmarks/floop18f90.f90 benchmarks/floop19.cpp benchmarks/floop19f90.f90 benchmarks/floop1f90.f90 benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop21f90.f90 benchmarks/floop22.cpp benchmarks/floop22f90.f90 benchmarks/floop23.cpp benchmarks/floop23f90.f90 benchmarks/floop24.cpp benchmarks/floop24f90.f90 benchmarks/floop25.cpp benchmarks/floop25f90.f90 benchmarks/floop2f90.f90 benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop36f90.f90 benchmarks/floop5.cpp benchmarks/floop5f90.f90 benchmarks/floop6.cpp benchmarks/floop6f90.f90 benchmarks/floop8.cpp benchmarks/floop8f90.f90 benchmarks/floop9.cpp benchmarks/floop9f90.f90 benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop100.cpp benchmarks/loop100f90.f90 benchmarks/loop10f90.f90 benchmarks/loop11.cpp benchmarks/loop11f90.f90 benchmarks/loop12.cpp benchmarks/loop12f90.f90 benchmarks/loop13.cpp benchmarks/loop13f90.f90 benchmarks/loop14.cpp benchmarks/loop14f90.f90 benchmarks/loop15.cpp benchmarks/loop15f90.f90 benchmarks/loop16.cpp benchmarks/loop16f90.f90 benchmarks/loop17.cpp benchmarks/loop17f90.f90 benchmarks/loop18.cpp benchmarks/loop18f90.f90 benchmarks/loop19.cpp benchmarks/loop19f90.f90 benchmarks/loop1f90.f90 benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop21f90.f90 benchmarks/loop22.cpp benchmarks/loop22f90.f90 benchmarks/loop23.cpp benchmarks/loop23f90.f90 benchmarks/loop24.cpp benchmarks/loop24f90.f90 benchmarks/loop25.cpp benchmarks/loop25f90.f90 benchmarks/loop2f90.f90 benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop36f90.f90 benchmarks/loop5.cpp benchmarks/loop5f90.f90 benchmarks/loop6.cpp benchmarks/loop6f90.f90 benchmarks/loop8.cpp benchmarks/loop8f90.f90 benchmarks/loop9.cpp benchmarks/loop9f90.f90 blitz/generate/makeloops.py -description: -Fixed yet another bug in makeloops. - - -changeset: 1836:52fb01a72be8 -user: Patrik Jonsson -date: Wed Jun 29 10:11:57 2011 -0700 -files: benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop10f.f benchmarks/floop11.cpp benchmarks/floop11f.f benchmarks/floop12.cpp benchmarks/floop12f.f benchmarks/floop13.cpp benchmarks/floop13f.f benchmarks/floop14.cpp benchmarks/floop14f.f benchmarks/floop15.cpp benchmarks/floop15f.f benchmarks/floop16.cpp benchmarks/floop16f.f benchmarks/floop17.cpp benchmarks/floop17f.f benchmarks/floop18.cpp benchmarks/floop18f.f benchmarks/floop19.cpp benchmarks/floop19f.f benchmarks/floop1f.f benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop21f.f benchmarks/floop22.cpp benchmarks/floop22f.f benchmarks/floop23.cpp benchmarks/floop23f.f benchmarks/floop24.cpp benchmarks/floop24f.f benchmarks/floop25.cpp benchmarks/floop25f.f benchmarks/floop2f.f benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop36f.f benchmarks/floop3f.f benchmarks/floop5.cpp benchmarks/floop5f.f benchmarks/floop6.cpp benchmarks/floop6f.f benchmarks/floop8.cpp benchmarks/floop8f.f benchmarks/floop9.cpp benchmarks/floop9f.f benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop100.cpp benchmarks/loop100f.f benchmarks/loop10f.f benchmarks/loop11.cpp benchmarks/loop11f.f benchmarks/loop12.cpp benchmarks/loop12f.f benchmarks/loop13.cpp benchmarks/loop13f.f benchmarks/loop14.cpp benchmarks/loop14f.f benchmarks/loop15.cpp benchmarks/loop15f.f benchmarks/loop16.cpp benchmarks/loop16f.f benchmarks/loop17.cpp benchmarks/loop17f.f benchmarks/loop18.cpp benchmarks/loop18f.f benchmarks/loop19.cpp benchmarks/loop19f.f benchmarks/loop1f.f benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop21f.f benchmarks/loop22.cpp benchmarks/loop22f.f benchmarks/loop23.cpp benchmarks/loop23f.f benchmarks/loop24.cpp benchmarks/loop24f.f benchmarks/loop25.cpp benchmarks/loop25f.f benchmarks/loop2f.f benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop36f.f benchmarks/loop3f.f benchmarks/loop5.cpp benchmarks/loop5f.f benchmarks/loop6.cpp benchmarks/loop6f.f benchmarks/loop8.cpp benchmarks/loop8f.f benchmarks/loop9.cpp benchmarks/loop9f.f blitz/generate/makeloops.py -description: -Fixed a bug in makeloops for loop expressions containing several statements. - - -changeset: 1835:3b5d96bde968 -user: Patrik Jonsson -date: Tue Jun 28 15:08:12 2011 -0700 -files: benchmarks/Makefile.am benchmarks/arrdaxpy.cpp benchmarks/arrdaxpy.m benchmarks/arrdaxpy2.m benchmarks/arrdaxpyf.f -description: -Made the arrdaxpy benchmark actually be included in the bench targets and compile. - - -changeset: 1834:c31ec13b787a -user: Patrik Jonsson -date: Tue Jun 28 14:34:11 2011 -0700 -files: benchmarks/acoustic.cpp -description: -Fixed flops parameter and some warnings in acoustic benchmark. - - -changeset: 1833:0825284cb582 -user: Patrik Jonsson -date: Tue Jun 28 13:41:12 2011 -0700 -files: blitz/array/expr.h blitz/array/stencil-et-macros.h blitz/array/stencil-et.h blitz/generate/genstencils.py blitz/globeval.cc -description: -Operations that can not use the vectorized fastRead_tv can now indicate this by setting minWidth=0. This will bypass the vectorized operations in _bz_evaluateWithUnitStride and go directly to the scalar path. Used this mechanism to turn of vectorization for stencils. Finished updating stencil classes with fastRead_tv. Tests with stencils now work again. - - -changeset: 1832:8ad1428d6b8d -user: Patrik Jonsson -date: Tue Jun 28 12:36:50 2011 -0700 -files: blitz/array/expr.h blitz/array/functorExpr.h blitz/array/map.h blitz/array/reduce.h blitz/array/stencil-et.h blitz/array/where.h blitz/generate/genstencils.py blitz/indexexpr.h blitz/levicivita.h blitz/range.h blitz/tv2fastiter.h -description: -Further cleanup of the now-unused tvtypeprop typedefs in the ET classes. Added fastRead_tv() and the tvresult classes to the stencil ET classes. Stencils are now attempted to be vectorized, but it's not clear how this functionality should work so at this point we get a precondition failure. Need to either work out how to do it or how to not vectorize them. - - -changeset: 1831:8e406dc4dd1c -user: Patrik Jonsson -date: Tue Jun 28 12:34:05 2011 -0700 -files: blitz/globeval.cc -description: -Rank>1 stack traversals now also use the evaluateWithUnitStride and evaluateWithCommonStride functions for the inner loop, if appropriate. This should enable vectorization for the inner loops of higher-rank expressions as long as they are unit stride. - - -changeset: 1830:f3114d86ba95 -user: Patrik Jonsson -date: Tue Jun 28 09:14:08 2011 -0700 -files: blitz/array/expr.h blitz/array/functorExpr.h blitz/array/where.h -description: -Cleaned up old fastRead_tv from readHelper classes. - - -changeset: 1829:469fb9bd9755 -user: Patrik Jonsson -date: Tue Jun 28 09:13:51 2011 -0700 -files: testsuite/loop1.cpp -description: -Updated loop1 test case to use new benchext interface. - - -changeset: 1828:7e076a476614 -user: Patrik Jonsson -date: Tue Jun 28 08:59:41 2011 -0700 -files: benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop11.cpp benchmarks/floop12.cpp benchmarks/floop13.cpp benchmarks/floop14.cpp benchmarks/floop15.cpp benchmarks/floop16.cpp benchmarks/floop17.cpp benchmarks/floop18.cpp benchmarks/floop19.cpp benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop22.cpp benchmarks/floop23.cpp benchmarks/floop24.cpp benchmarks/floop25.cpp benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop5.cpp benchmarks/floop6.cpp benchmarks/floop8.cpp benchmarks/floop9.cpp benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop100.cpp benchmarks/loop11.cpp benchmarks/loop12.cpp benchmarks/loop13.cpp benchmarks/loop14.cpp benchmarks/loop15.cpp benchmarks/loop16.cpp benchmarks/loop17.cpp benchmarks/loop18.cpp benchmarks/loop19.cpp benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop22.cpp benchmarks/loop23.cpp benchmarks/loop24.cpp benchmarks/loop25.cpp benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop5.cpp benchmarks/loop6.cpp benchmarks/loop8.cpp benchmarks/loop9.cpp blitz/benchext.cc blitz/benchext.h blitz/generate/makeloops.py blitz/timer.h -description: -Benchext functionality with dependent variable now works correctly. Updated makeloops so loop benchmarks make use of this, and also added array tensor notation to the benchmarks. - - -changeset: 1827:85b6c418c275 -user: Patrik Jonsson -date: Mon Jun 27 15:32:51 2011 -0700 -files: benchmarks/acoustic.cpp -description: -Updated acoustic benchmark to use the benchext and run on several sizes. Not working right yet, though. - - -changeset: 1826:46a96a0bc719 -user: Patrik Jonsson -date: Mon Jun 27 15:32:28 2011 -0700 -files: blitz/benchext.cc blitz/benchext.h -description: -Changed the "rate decription" in benchext to explicitly setting the dependent variable. Depending on whether the timer is reporting seconds or cycles, it will then create the proper output quantity. Also added method currentImplementation() that returns the implementation currently being benchmarked, so it can easily be printed. - - -changeset: 1825:870827755b84 -user: Patrik Jonsson -date: Mon Jun 27 14:22:10 2011 -0700 -files: blitz/globeval.cc -description: -Fixed typo in globeval.cc. - - -changeset: 1824:729fe7dab4b8 -user: Patrik Jonsson -date: Mon Jun 27 14:21:58 2011 -0700 -files: benchmarks/daxpy.cpp benchmarks/tinydaxpy.cpp -description: -Updated daxpy and tinydaxpy tests to run with new ET classes. - - -changeset: 1823:979407299a71 -user: Patrik Jonsson -date: Mon Jun 27 14:21:06 2011 -0700 -files: blitz/timer.h src/globals.cpp -description: -Made timer ivar_ member a string instead of const char*. - - -changeset: 1822:6e69cd6bf77c -user: Patrik Jonsson -date: Mon Jun 27 14:15:01 2011 -0700 -files: benchmarks/acou3d.cpp benchmarks/acou3db4.cpp benchmarks/acoustic.cpp -description: -Updated acoustic benchmarks to work with new stencil operator names. - - -changeset: 1821:bf4d2dbfde7e -user: Patrik Jonsson -date: Mon Jun 27 14:13:50 2011 -0700 -files: blitz/timer.h src/globals.cpp -description: -Reverted timer to report seconds instead of us. - - -changeset: 1820:becc65b42c96 -user: Patrik Jonsson -date: Mon Jun 27 14:13:11 2011 -0700 -files: blitz/traversal.cc -description: -Fixed typo in traversal.cc - - -changeset: 1819:062688a03ef3 -user: Patrik Jonsson -date: Mon Jun 27 13:18:50 2011 -0700 -files: blitz/array/stencils.h testsuite/Makefile.am testsuite/stencil-extent.cpp -description: -Made stencilExtent member functions const so the automatic stencil extent detection works, and added testcase stencil-extent to test this. - - -changeset: 1818:513fd16d68d8 -user: Patrik Jonsson -date: Mon Jun 27 12:01:12 2011 -0700 -files: blitz/traversal.cc blitz/traversal.h blitz/vector2.h -description: -Added Vector default constructor and updated traversal functions to use new Vector instead of old. - - -changeset: 1817:18a93bbfc6e5 -user: Patrik Jonsson -date: Fri Jun 24 12:08:25 2011 -0700 -files: blitz/array-impl.h blitz/array/methods.cc blitz/memblock.h -description: -Added method blockLength() to MemoryBlockReference and made Array::storageSize() use that. Converted some comments to doxygen strings. - - -changeset: 1816:3268cc91465c -user: Patrik Jonsson -date: Thu Jun 23 17:30:24 2011 -0700 -files: blitz/array-impl.h blitz/array/funcs.h blitz/array/interlace.cc blitz/array/methods.cc blitz/array/resize.cc blitz/array/storage.h blitz/blitz.h blitz/memblock.h blitz/tinyvec2.h testsuite/chris-jeffery-2.cpp testsuite/contiguous.cpp testsuite/peter-bienstman-3.cpp testsuite/preexisting.cpp testsuite/transpose.cpp testsuite/troyer-genilloud.cpp -description: -Moved the specification of padded/unpadded storage to the GeneralArrayStorage object, and created shortcuts paddedArray and contiguousArray shortcuts. Preexisting memory constructors will now throw an exception if the specified array is noncontiguous. Tests were updated to specify contiguous arrays if they rely on knowing the strides. - - -changeset: 1815:25e0fde6666c -user: Patrik Jonsson -date: Thu Jun 23 17:30:02 2011 -0700 -files: blitz/simdtypes.h -description: -Removed specialization of simdTypes for bools as we can't adjust alignment anyway. - - -changeset: 1814:9ba706d744f5 -user: Patrik Jonsson -date: Thu Jun 23 14:42:32 2011 -0700 -files: blitz/array-impl.h blitz/array/interlace.cc blitz/array/methods.cc blitz/array/resize.cc blitz/array/storage.h blitz/blitz.h blitz/simdtypes.h blitz/tinyvec2.h configure.ac testsuite/contiguous.cpp testsuite/peter-bienstman-3.cpp testsuite/preexisting.cpp testsuite/transpose.cpp -description: -Added argument paddingPolicy to Array constructors and resize methods. The default value is set by the --enable-array-length-padding argument to configure. Reduced duplicated code in constructors. Restored tests that test storage to just specify unpadded storage. - - -changeset: 1813:a672a5056a2d -user: Patrik Jonsson -date: Thu Jun 23 13:42:24 2011 -0700 -files: testsuite/alignment.cpp -description: -Updated alignment test to check paddedLength, offsetToAlignment and _bz_meta_bitwidth. - - -changeset: 1812:2c9dd168eb7c -parent: 1808:7f1cd86bf252 -user: Patrik Jonsson -date: Thu Jun 23 13:36:16 2011 -0700 -files: blitz/simdtypes.h -description: -Added paddedLength() function to simdTypes, and rewrite offsetToAlignment to not do a modulo. Added _bz_meta_bitwidth metaprogram to calculate the number of bits needed to fit a number. - - -changeset: 1811:e8364831590c -branch: vectorization -parent: 1605:1a3d913c4c80 -user: Patrik Jonsson -date: Wed Jun 22 08:33:02 2011 -0700 -description: -Closed vectorization branch, this was only a proof-of-concept. - - -changeset: 1810:7e7b743d45dc -branch: compat-64bit -parent: 1662:243a3797fdec -user: Patrik Jonsson -date: Wed Jun 22 08:30:27 2011 -0700 -description: -Closed compat-64bit branch, as this has was merged in CVS. - - -changeset: 1809:115f8f0c4aa3 -branch: stencil-et -parent: 1684:f42d3e515fbd -user: Patrik Jonsson -date: Wed Jun 22 08:27:58 2011 -0700 -description: -Closed stencil-et branch, as this has was merged in CVS. - - -changeset: 1808:7f1cd86bf252 -parent: 1807:67c9701bd14b -parent: 1796:695e799bdc3f -user: Patrik Jonsson -date: Wed Jun 22 08:09:58 2011 -0700 -description: -Merge. - - -changeset: 1807:67c9701bd14b -user: Patrik Jonsson -date: Wed Jun 22 08:08:57 2011 -0700 -files: blitz/array-impl.h blitz/array/methods.cc configure.ac testsuite/contiguous.cpp testsuite/ctors.cpp testsuite/peter-bienstman-3.cpp testsuite/preexisting.cpp testsuite/transpose.cpp -description: -Added configure option --enable-array-length-padding as a switch for padding the minor rank of arrays to even SIMD widths. Added back test of uneven array sizes in contiguous test, and removed the warning printout from the tests that assumed they knew what the strides should be unless this option is enabled. - - -changeset: 1806:2be82135f511 -user: Patrik Jonsson -date: Wed Jun 22 07:58:17 2011 -0700 -files: testsuite/expression-slicing.cpp testsuite/safeToReturn.cpp -description: -Fixed safeToReturn and expression-slicing tests to not compare floating-point expressions for equality. - - -changeset: 1805:76b227678361 -parent: 1804:c5164c57d20b -parent: 1799:51e03b388db3 -user: Patrik Jonsson -date: Wed Jun 22 09:54:42 2011 -0400 -description: -Merged from governator. - - -changeset: 1804:c5164c57d20b -parent: 1803:0759f0ead3b9 -parent: 1798:f6f34b144649 -user: Patrik Jonsson -date: Wed Jun 22 09:50:55 2011 -0400 -files: blitz/applics.h blitz/array-old.h blitz/array/eval.cc blitz/benchext.cc blitz/extremum.h blitz/matdiag.h blitz/matexpr.h blitz/matgen.h blitz/mathf2.h blitz/matltri.h blitz/matref.h blitz/matrix.cc blitz/matrix.h blitz/matsymm.h blitz/mattoep.h blitz/matutri.h blitz/mstruct.h blitz/rand-dunif.h blitz/rand-mt.h blitz/rand-normal.h blitz/rand-tt800.h blitz/rand-uniform.h blitz/random.h blitz/randref.h blitz/tiny.h blitz/tinymat.h blitz/tinymatexpr.h blitz/tinymatio.cc blitz/tinyvec-et.h blitz/tinyvec.h blitz/tinyvecio.cc blitz/tinyveciter.h blitz/vecaccum.cc blitz/vecall.cc blitz/vecany.cc blitz/veccount.cc blitz/vecdelta.cc blitz/vecdot.cc blitz/vecexpr.h blitz/vecexprwrap.h blitz/vecglobs.cc blitz/vecglobs.h blitz/vecio.cc blitz/veciter.h blitz/vecmax.cc blitz/vecmin.cc blitz/vecnorm.cc blitz/vecnorm1.cc blitz/vecpick.cc blitz/vecpick.h blitz/vecpickio.cc blitz/vecpickiter.h blitz/vecproduct.cc blitz/vecsum.cc blitz/vector-et.h blitz/vector.cc blitz/vector.h blitz/vecwhere.h blitz/zero.cc blitz/zero.h test.cc -description: -Merged from governator. - - -changeset: 1803:0759f0ead3b9 -user: Patrik Jonsson -date: Wed Jun 22 09:50:16 2011 -0400 -files: blitz/benchext.cc blitz/benchext.h -description: -Benchext now uses long long integers to measure cycles instead of doubles. - - -changeset: 1802:dd0c79809d3c -parent: 1801:41a98caf5177 -parent: 1763:0532e5ff7ab5 -user: Patrik Jonsson -date: Wed Jun 15 13:08:51 2011 -0400 -description: -Merged makeloops. - - -changeset: 1801:41a98caf5177 -user: Patrik Jonsson -date: Wed Jun 15 13:08:15 2011 -0400 -files: blitz/benchext.cc -description: -Removed some debugging output from benchext. - - -changeset: 1800:1934801ac8af -parent: 1762:33cf8f76121c -user: Patrik Jonsson -date: Wed Jun 15 12:55:55 2011 -0400 -files: blitz/benchext.cc blitz/benchext.h blitz/timer.h -description: -Changed PAPI timer to use PAPI_read_counters, because stopping and starting them is very expensive. Also changed so benchext only uses one timer class, since they never run concurrently and it complicates PAPI use. - - -changeset: 1799:51e03b388db3 -user: Patrik Jonsson -date: Wed Jun 22 06:54:14 2011 -0700 -files: blitz/globeval.cc -description: -Fixed a typo in globeval. - - -changeset: 1798:f6f34b144649 -user: Patrik Jonsson -date: Tue Jun 21 20:59:58 2011 -0700 -files: blitz/benchext.cc -description: -Tweaked pylab graph. - - -changeset: 1797:1521c60b11a6 -parent: 1795:924b46cce6ab -user: Patrik Jonsson -date: Tue Jun 21 20:59:24 2011 -0700 -files: blitz/globeval.cc -description: -Tweaked unit stride evaluation after looking at float outputs. - - -changeset: 1796:695e799bdc3f -user: Paul P. Hilscher -date: Wed Jun 22 01:38:27 2011 -0400 -files: blitz/types.h -description: -Added types.h with typedefs for most famous array types (later also vectors) - - e.g. typedef Array Array3d; - - thus instead of writing - - void Func(Array A) { Array, 4> B; ... } - - we can also write - - void Func(Array3d A) { Array4z B; .... } - - -changeset: 1795:924b46cce6ab -user: Patrik Jonsson -date: Tue Jun 21 18:01:43 2011 -0700 -files: blitz/generate/makeloops.py -description: -Fixed bug in makeloops. - - -changeset: 1794:3da65fac47c5 -user: Patrik Jonsson -date: Tue Jun 21 17:45:14 2011 -0700 -files: blitz/generate/makeloops.py -description: -makeloops.py now generates loops for unaligned/misaligned array expressions. - - -changeset: 1793:ded58a3c9ac3 -user: Patrik Jonsson -date: Tue Jun 21 15:21:43 2011 -0700 -files: blitz/array/expr.h blitz/array/fastiter.h blitz/array/functorExpr.h blitz/array/map.h blitz/array/reduce.h blitz/array/stencil-et.h blitz/array/where.h blitz/globeval.cc blitz/tm2fastiter.h blitz/tv2fastiter.h -description: -Updated remaining expressions classes with minWidth/maxWidth members and new fastRead_tv template. Tests are now back to normal. - - -changeset: 1792:4042693d9d96 -user: Patrik Jonsson -date: Tue Jun 21 14:33:06 2011 -0700 -files: blitz/globeval.cc blitz/meta/vecassign.h blitz/tinyvec2.cc blitz/tinyvec2.h -description: -Major tweak to unit stride evaluation. Lowered overhead by meta-unrolling assignments for small arrays and sticking with unaligned accesses if the extra overhead in aligning isn't worth it. Also changed the chunked_updater to call _tv_evaluate directly instead of going through the assignment operator. This has increased performance for <10-element arrays significantly. However, there is now something going on with aligned arrays > 256 elements, where the performance drops by a factor of 2. - - -changeset: 1791:cc6a1aabe044 -user: Patrik Jonsson -date: Tue Jun 21 10:54:42 2011 -0700 -files: blitz/array/expr.h blitz/array/fastiter.h blitz/globeval.cc blitz/indexexpr.h blitz/levicivita.h blitz/range.h blitz/tv2fastiter.h testsuite/complex-test.cpp -description: -Changed fastRead_tv() to be a template so expressions can return vectorization units of different widths, chosen at evaluation, and make the expression classes return both the min and max simd width of their operands. Updated the unit stride evaluation to decide whether to do aligned or unaligned vectorization (though the actual aligned/unaligned routines remain to be written). - - -changeset: 1790:b74fb32f5843 -user: Patrik Jonsson -date: Tue Jun 21 10:52:03 2011 -0700 -files: testsuite/vectorization.cpp -description: -Added a test case vectorization to test that the vectorization routines do sensible things with weird expressions. - - -changeset: 1789:6a999035167c -user: Patrik Jonsson -date: Tue Jun 21 08:33:24 2011 -0700 -files: blitz/array/expr.h blitz/array/fastiter.h blitz/globeval.cc blitz/indexexpr.h blitz/levicivita.h blitz/range.h blitz/tv2fastiter.h -description: -Added simdWidth member to major expression classes. - - -changeset: 1788:88184b5be7f8 -user: Patrik Jonsson -date: Mon Jun 20 16:47:05 2011 -0700 -files: blitz/array/fastiter.h blitz/globeval.cc blitz/simdtypes.h blitz/tinymat2.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tm2fastiter.h blitz/tv2fastiter.h -description: -Debugged the evaluation loop for unaligned elements. Improved traverse debug messages. Fixed bug in simdTypes::offsetToAlignment and in other alignment checks. This has now broken complex-test, as it tries to vectorize it but doesn't get the type right. - - -changeset: 1787:8f8e91ae2bb2 -user: Patrik Jonsson -date: Mon Jun 20 14:32:22 2011 -0700 -files: testsuite/alignment.cpp -description: -Updated alignment test case to test the offsetToAlignment function. - - -changeset: 1786:e52d98b0127e -user: Patrik Jonsson -date: Mon Jun 20 14:31:11 2011 -0700 -files: blitz/array-impl.h blitz/array/expr.h blitz/array/fastiter.h blitz/array/functorExpr.h blitz/array/map.h blitz/array/reduce.h blitz/array/stencil-et.h blitz/array/where.h blitz/globeval.cc blitz/indexexpr.h blitz/simdtypes.h blitz/tv2fastiter.h -description: -Added vectorization of unaligned expressions by completing the initial scalar operations. This works as long as source and dest have identical misalignment. - - -changeset: 1785:b3bc1e1098e7 -user: Patrik Jonsson -date: Mon Jun 20 13:21:09 2011 -0700 -files: blitz/array-impl.h blitz/tinyvec2.h -description: -Added TinyVector constructor taking an expression. - - -changeset: 1784:c85903971cb4 -user: Patrik Jonsson -date: Mon Jun 20 13:20:32 2011 -0700 -files: blitz/array/asexpr.h -description: -Added BzReductionResult, BzIndexmapResult, BzStencilResult, and BzBinaryStencilResult traits classes. - - -changeset: 1783:ee4cd02aec30 -user: Patrik Jonsson -date: Sat Jun 18 18:45:25 2011 -0700 -files: blitz/tinyvec2.h -description: -Made TinyVector expression constructor explicit to avoid implicit conversion ambiguities. - - -changeset: 1782:d59002128cfb -user: Patrik Jonsson -date: Sat Jun 18 18:19:08 2011 -0700 -files: blitz/array/Makefile.am -description: -Added stencil-et-macros.h to installed files. - - -changeset: 1781:22c668d76b8c -user: Patrik Jonsson -date: Sat Jun 18 18:12:36 2011 -0700 -files: blitz/Makefile.am blitz/array/Makefile.am -description: -Fixed Makefile so all new files are installed. - - -changeset: 1780:c44f496f5545 -user: Patrik Jonsson -date: Sat Jun 18 18:12:11 2011 -0700 -files: blitz/tinyvec2.h testsuite/tinyvec.cpp -description: -Added tinyvec expression constructor. - - -changeset: 1779:46e8a4374790 -user: Patrik Jonsson -date: Sat Jun 18 12:54:57 2011 -0700 -files: blitz/array/functorExpr.h testsuite/Makefile.am testsuite/member_function.cpp -description: -Fixed a bug in the functorExpr macros that caused member function functors to use out-of-scope temporary copies of their parent objects if expressions were passed by value. - - -changeset: 1778:493d32bb7166 -user: Patrik Jonsson -date: Fri Jun 17 15:27:01 2011 -0700 -files: blitz/array-impl.h blitz/array/expr.h blitz/array/fastiter.h blitz/array/functorExpr.h blitz/array/map.h blitz/array/ops.cc blitz/array/reduce.h blitz/array/stencil-et.h blitz/array/where.h blitz/globeval.cc blitz/indexexpr.h blitz/memblock.h blitz/range.h blitz/simdtypes.h blitz/tinyvec2.cc blitz/tm2fastiter.h blitz/tv2fastiter.h -description: -Tweaks to lower the evaluation overhead. Evaluation functions are now members of a helper class _bz_evaluator that is templated based on rank. Extracted 1-d unit stride and common stride functions into separate functions. Simplified alignment and unit stride checks. Removed the non-USE_FAST_READ_EXPR code. This has increased the performance for small arrays by ~50%. - - -changeset: 1777:78209d59637b -user: Patrik Jonsson -date: Fri Jun 17 10:50:18 2011 -0700 -files: blitz/array-impl.h -description: -Removed declarations of the now-unused evaluation functions in Array. - - -changeset: 1776:173191ec4caf -user: Patrik Jonsson -date: Fri Jun 17 10:46:55 2011 -0700 -files: blitz/globeval.cc testsuite/alignment.cpp -description: -EvaluateWithStackTraversal1 now correctly deals with the ends of uneven-length arrays by using the non-vectorized update for those elements. The alignment test now checks that arrays report their alignment correctly and that the update doesn't overrun the end of the array. - - -changeset: 1775:e1600266eb9d -user: Patrik Jonsson -date: Fri Jun 17 09:37:49 2011 -0700 -files: blitz/array.cc -description: -array.cc now includes tinymat2.cc - - -changeset: 1774:3f8e4553b7b2 -user: Patrik Jonsson -date: Fri Jun 17 09:37:26 2011 -0700 -files: testsuite/qcd.cpp -description: -Updated qcd test to use ranlib. - - -changeset: 1773:57282f9614de -user: Patrik Jonsson -date: Fri Jun 17 09:29:46 2011 -0700 -files: test.cc -description: -Removed test.cc - - -changeset: 1772:f0858eb48bde -user: Patrik Jonsson -date: Fri Jun 17 11:36:56 2011 -0400 -files: benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop11.cpp benchmarks/floop12.cpp benchmarks/floop13.cpp benchmarks/floop14.cpp benchmarks/floop15.cpp benchmarks/floop16.cpp benchmarks/floop17.cpp benchmarks/floop18.cpp benchmarks/floop19.cpp benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop22.cpp benchmarks/floop23.cpp benchmarks/floop24.cpp benchmarks/floop25.cpp benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop5.cpp benchmarks/floop6.cpp benchmarks/floop8.cpp benchmarks/floop9.cpp benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop100.cpp benchmarks/loop11.cpp benchmarks/loop12.cpp benchmarks/loop13.cpp benchmarks/loop14.cpp benchmarks/loop15.cpp benchmarks/loop16.cpp benchmarks/loop17.cpp benchmarks/loop18.cpp benchmarks/loop19.cpp benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop22.cpp benchmarks/loop23.cpp benchmarks/loop24.cpp benchmarks/loop25.cpp benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop5.cpp benchmarks/loop6.cpp benchmarks/loop8.cpp benchmarks/loop9.cpp blitz/Makefile.am blitz/generate/makeloops.py blitz/rand-dunif.h blitz/rand-mt.h blitz/rand-normal.h blitz/rand-tt800.h blitz/rand-uniform.h blitz/random.h blitz/randref.h testsuite/loop1.cpp -description: -More housecleaning. Removed the RNGs in blitz/ and redirected the loop benchmarks to use the ones in ranlib::. - - -changeset: 1771:49f4a83346b6 -user: Patrik Jonsson -date: Fri Jun 17 11:16:52 2011 -0400 -files: blitz/Makefile.am blitz/applics.h blitz/array-impl.h blitz/array-old.h blitz/array/Makefile.am blitz/array/eval.cc blitz/extremum.h blitz/generate/Makefile.am blitz/matdiag.h blitz/matexpr.h blitz/matgen.h blitz/mathf2.h blitz/matltri.h blitz/matref.h blitz/matrix.cc blitz/matrix.h blitz/matsymm.h blitz/mattoep.h blitz/matutri.h blitz/memblock.h blitz/mstruct.h blitz/tiny.h blitz/tinymat.h blitz/tinymatexpr.h blitz/tinymatio.cc blitz/tinyvec-et.h blitz/tinyvec.h blitz/tinyvecio.cc blitz/tinyveciter.h blitz/vecaccum.cc blitz/vecall.cc blitz/vecany.cc blitz/veccount.cc blitz/vecdelta.cc blitz/vecdot.cc blitz/vecexpr.h blitz/vecexprwrap.h blitz/vecglobs.cc blitz/vecglobs.h blitz/vecio.cc blitz/veciter.h blitz/vecmax.cc blitz/vecmin.cc blitz/vecnorm.cc blitz/vecnorm1.cc blitz/vecpick.cc blitz/vecpick.h blitz/vecpickio.cc blitz/vecpickiter.h blitz/vecproduct.cc blitz/vecsum.cc blitz/vector-et.h blitz/vector.cc blitz/vector.h blitz/vecwhere.h blitz/zero.cc blitz/zero.h src/globals.cpp -description: -HOUSE CLEANING. Removed most old files for the old ET machinery that are no longer needed. - - -changeset: 1770:292be3be8b3c -user: Patrik Jonsson -date: Thu Jun 16 20:31:23 2011 -0700 -files: blitz/array-impl.h testsuite/Makefile.am testsuite/preexisting.cpp -description: -Added a warning if an Array is constructed from preexisting data but the array is not contiguous. Added a testcase that exercises this. - - -changeset: 1769:3d1d9b901f0a -user: Patrik Jonsson -date: Thu Jun 16 20:03:20 2011 -0700 -files: testsuite/Makefile.am testsuite/alignment.cpp -description: -Created a test case checking that TinyVector and TinyMatrix are aligned properly. - - -changeset: 1768:727ee54426f5 -user: Patrik Jonsson -date: Thu Jun 16 19:43:34 2011 -0700 -files: blitz/array-impl.h blitz/array/methods.cc testsuite/contiguous.cpp testsuite/ctors.cpp testsuite/peter-bienstman-3.cpp testsuite/peter-nordlund-1.cpp testsuite/pthread.cpp testsuite/transpose.cpp -description: -Padded array minor rank to an even simd width. This changes strides and whether the array is contiguous, so necessitated changing some tests that assume they know what the stride should be. Also added array method storageSize() which returs the size of the memoryblock, since numElements is no longer a measure of how large that block is. - - -changeset: 1767:9d70f0a558d2 -user: Patrik Jonsson -date: Thu Jun 16 15:02:19 2011 -0700 -files: benchmarks/loop100f.f benchmarks/loop100f90.f90 -description: -Added loop100 fortran files. - - -changeset: 1766:5c64913a62cc -user: Patrik Jonsson -date: Thu Jun 16 15:01:43 2011 -0700 -files: benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop11.cpp benchmarks/floop12.cpp benchmarks/floop13.cpp benchmarks/floop14.cpp benchmarks/floop15.cpp benchmarks/floop16.cpp benchmarks/floop17.cpp benchmarks/floop18.cpp benchmarks/floop19.cpp benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop22.cpp benchmarks/floop23.cpp benchmarks/floop24.cpp benchmarks/floop25.cpp benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop5.cpp benchmarks/floop6.cpp benchmarks/floop8.cpp benchmarks/floop9.cpp benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop100.cpp benchmarks/loop11.cpp benchmarks/loop12.cpp benchmarks/loop13.cpp benchmarks/loop14.cpp benchmarks/loop15.cpp benchmarks/loop16.cpp benchmarks/loop17.cpp benchmarks/loop18.cpp benchmarks/loop19.cpp benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop22.cpp benchmarks/loop23.cpp benchmarks/loop24.cpp benchmarks/loop25.cpp benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop5.cpp benchmarks/loop6.cpp benchmarks/loop8.cpp benchmarks/loop9.cpp -description: -Regenerated loops. - - -changeset: 1765:0a1639f34c04 -user: Patrik Jonsson -date: Thu Jun 16 14:58:42 2011 -0700 -files: blitz/array/stencil-et.h -description: -Added numTV/TMoperands to _bz_StencilExpr2. - - -changeset: 1764:322758b11aca -user: Patrik Jonsson -date: Thu Jun 16 14:58:11 2011 -0700 -files: testsuite/tinyvec.cpp -description: -Fixed typo in tinyvec test. - - -changeset: 1763:0532e5ff7ab5 -user: Patrik Jonsson -date: Wed Jun 15 10:06:49 2011 -0700 -files: blitz/generate/makeloops.py -description: -Conditionally re-added the Vector benchmark in makeloops. - - -changeset: 1762:33cf8f76121c -user: Patrik Jonsson -date: Wed Jun 15 09:47:45 2011 -0700 -files: blitz/benchext.cc -description: -Fixed typo in pylab output. - - -changeset: 1761:9feccebd61cc -user: Patrik Jonsson -date: Tue Jun 14 09:36:23 2011 -0700 -files: blitz/benchext.cc -description: -Tweaked python graph in benchext. - - -changeset: 1760:8a7be5be87d2 -user: Patrik Jonsson -date: Mon Jun 13 11:59:02 2011 -0700 -files: benchmarks/Makefile.am -description: -Added loop100 target. - - -changeset: 1759:ff330fda93ec -user: Patrik Jonsson -date: Mon Jun 13 11:58:35 2011 -0700 -files: blitz/benchext.cc -description: -Benchext now sets negative results to zero. - - -changeset: 1758:9bb759a31af3 -user: Patrik Jonsson -date: Mon Jun 13 11:57:51 2011 -0700 -files: blitz/generate/makeloops.py -description: -Updated makeloops to generate loop100 and to generate F77 continuation lines if the expression is too long. - - -changeset: 1757:9586d37e6fda -user: Patrik Jonsson -date: Mon Jun 13 08:20:15 2011 -0700 -files: blitz/tinymat2.h blitz/tm2fastiter.h blitz/tv2fastiter.h -description: -Re-enabled fastRead_tv() for Tiny containers, since these are needed for mixed Array/tiny expressions. The chunked_updater prevents recursive instantiation loops now. - - -changeset: 1756:67fe2f750fb4 -user: Patrik Jonsson -date: Sun Jun 12 14:30:24 2011 -0700 -files: blitz/array/expr.h blitz/array/functorExpr.h blitz/array/map.h blitz/array/reduce.h blitz/array/stencil-et.h blitz/array/where.h blitz/benchext.cc blitz/globeval.cc blitz/indexexpr.h blitz/timer.h blitz/tinymat2.h blitz/tm2fastiter.h blitz/tuning.h blitz/tv2fastiter.h test.cc testsuite/loop1.cpp testsuite/qcd.cpp -description: -Added tvresult return type functionality to remaining expression classes. All tests now compile, and those that don't pass do so for different reasons. - - -changeset: 1755:34ffda55e5ce -parent: 1754:ed427d5babca -parent: 1750:b47f69881d33 -user: Patrik Jonsson -date: Sun Jun 12 16:57:23 2011 -0400 -description: -Merged from governator. - - -changeset: 1754:ed427d5babca -user: Patrik Jonsson -date: Sun Jun 12 16:56:27 2011 -0400 -files: blitz/generate/makeloops.py -description: -Removed nops from loops, increased iters for low numbers, and fixed declaration of sink(). - - -changeset: 1753:d17056f4b08b -user: Patrik Jonsson -date: Sun Jun 12 16:53:49 2011 -0400 -files: configure.ac -description: -Added check for PAPI library. - - -changeset: 1752:f73e13fa07f2 -user: Patrik Jonsson -date: Sun Jun 12 16:52:58 2011 -0400 -files: blitz/benchext.cc blitz/benchext.h -description: -Added reporting of instructions per cycle and total flops per cycle to benchmark if PAPI is used. - - -changeset: 1751:440bd8b86ed0 -parent: 1748:248ae0986c24 -user: Patrik Jonsson -date: Sun Jun 12 16:51:40 2011 -0400 -files: blitz/timer.h src/globals.cpp -description: -Fixed timer class to work with PAPI. - - -changeset: 1750:b47f69881d33 -user: Patrik Jonsson -date: Sun Jun 12 07:35:52 2011 -0700 -files: blitz/array/expr.h -description: -Fixed typo in UnaryExprOp. - - -changeset: 1749:4e0a0cf8b488 -user: Patrik Jonsson -date: Sun Jun 12 10:10:23 2011 -0400 -files: blitz/array-impl.h blitz/array/expr.h blitz/array/fastiter.h blitz/globeval.cc blitz/indexexpr.h blitz/levicivita.h blitz/range.h blitz/tinyvec2.h blitz/tv2fastiter.h blitz/update.h test.cc -description: -Added fastRead_tv() function to main expression classes and routed aligned Array expressions through it. Basically functional, but many things don't compile. - - -changeset: 1748:248ae0986c24 -user: Patrik Jonsson -date: Sat Jun 11 15:32:46 2011 -0700 -files: benchmarks/Makefile.am benchmarks/floop1.cpp benchmarks/floop10.cpp benchmarks/floop10f.f benchmarks/floop10f90.f90 benchmarks/floop11.cpp benchmarks/floop11f.f benchmarks/floop11f90.f90 benchmarks/floop12.cpp benchmarks/floop12f.f benchmarks/floop12f90.f90 benchmarks/floop13.cpp benchmarks/floop13f.f benchmarks/floop13f90.f90 benchmarks/floop14.cpp benchmarks/floop14f.f benchmarks/floop14f90.f90 benchmarks/floop15.cpp benchmarks/floop15f.f benchmarks/floop15f90.f90 benchmarks/floop16.cpp benchmarks/floop16f.f benchmarks/floop16f90.f90 benchmarks/floop17.cpp benchmarks/floop17f.f benchmarks/floop17f90.f90 benchmarks/floop18.cpp benchmarks/floop18f.f benchmarks/floop18f90.f90 benchmarks/floop19.cpp benchmarks/floop19f.f benchmarks/floop19f90.f90 benchmarks/floop1f.f benchmarks/floop1f90.f90 benchmarks/floop2.cpp benchmarks/floop21.cpp benchmarks/floop21f.f benchmarks/floop21f90.f90 benchmarks/floop22.cpp benchmarks/floop22f.f benchmarks/floop22f90.f90 benchmarks/floop23.cpp benchmarks/floop23f.f benchmarks/floop23f90.f90 benchmarks/floop24.cpp benchmarks/floop24f.f benchmarks/floop24f90.f90 benchmarks/floop25.cpp benchmarks/floop25f.f benchmarks/floop25f90.f90 benchmarks/floop2f.f benchmarks/floop2f90.f90 benchmarks/floop3.cpp benchmarks/floop36.cpp benchmarks/floop36f.f benchmarks/floop36f90.f90 benchmarks/floop3f.f benchmarks/floop3f90.f90 benchmarks/floop5.cpp benchmarks/floop5f.f benchmarks/floop5f90.f90 benchmarks/floop6.cpp benchmarks/floop6f.f benchmarks/floop6f90.f90 benchmarks/floop8.cpp benchmarks/floop8f.f benchmarks/floop8f90.f90 benchmarks/floop9.cpp benchmarks/floop9f.f benchmarks/floop9f90.f90 benchmarks/loop1.cpp benchmarks/loop10.cpp benchmarks/loop10f.f benchmarks/loop10f90.f90 benchmarks/loop11.cpp benchmarks/loop11f.f benchmarks/loop11f90.f90 benchmarks/loop12.cpp benchmarks/loop12f.f benchmarks/loop12f90.f90 benchmarks/loop13.cpp benchmarks/loop13f.f benchmarks/loop13f90.f90 benchmarks/loop14.cpp benchmarks/loop14f.f benchmarks/loop14f90.f90 benchmarks/loop15.cpp benchmarks/loop15f.f benchmarks/loop15f90.f90 benchmarks/loop16.cpp benchmarks/loop16f.f benchmarks/loop16f90.f90 benchmarks/loop17.cpp benchmarks/loop17f.f benchmarks/loop17f90.f90 benchmarks/loop18.cpp benchmarks/loop18f.f benchmarks/loop18f90.f90 benchmarks/loop19.cpp benchmarks/loop19f.f benchmarks/loop19f90.f90 benchmarks/loop1f.f benchmarks/loop1f90.f90 benchmarks/loop2.cpp benchmarks/loop21.cpp benchmarks/loop21f.f benchmarks/loop21f90.f90 benchmarks/loop22.cpp benchmarks/loop22f.f benchmarks/loop22f90.f90 benchmarks/loop23.cpp benchmarks/loop23f.f benchmarks/loop23f90.f90 benchmarks/loop24.cpp benchmarks/loop24f.f benchmarks/loop24f90.f90 benchmarks/loop25.cpp benchmarks/loop25f.f benchmarks/loop25f90.f90 benchmarks/loop2f.f benchmarks/loop2f90.f90 benchmarks/loop3.cpp benchmarks/loop36.cpp benchmarks/loop36f.f benchmarks/loop36f90.f90 benchmarks/loop3f.f benchmarks/loop3f90.f90 benchmarks/loop5.cpp benchmarks/loop5f.f benchmarks/loop5f90.f90 benchmarks/loop6.cpp benchmarks/loop6f.f benchmarks/loop6f90.f90 benchmarks/loop8.cpp benchmarks/loop8f.f benchmarks/loop8f90.f90 benchmarks/loop9.cpp benchmarks/loop9f.f benchmarks/loop9f90.f90 test.cc -description: -Committed updated loop benchmarks and added versions using float. - - -changeset: 1747:144bf0c077b7 -user: Patrik Jonsson -date: Sat Jun 11 15:31:33 2011 -0700 -files: blitz/benchext.cc blitz/benchext.h blitz/generate/makeloops.py blitz/timer.h -description: -Added an untested implementation for the timer class to use PAPI if available, and for the benchmark class to accept time info in cycles or seconds. - - -changeset: 1746:b5406bbb53ae -user: Patrik Jonsson -date: Sat Jun 11 14:23:36 2011 -0700 -files: blitz/generate/makeloops.py -description: -Now generates correct loop benchmarks. Added float benchmarks, too. - - -changeset: 1745:ce5438656557 -user: Patrik Jonsson -date: Sat Jun 11 11:57:58 2011 -0700 -files: blitz/generate/makeloops.py -description: -Now writes all loops in loops.data. - - -changeset: 1744:5a4a4105c96f -user: Patrik Jonsson -date: Sat Jun 11 10:54:42 2011 -0700 -files: blitz/generate/makeloops.py -description: -Started a makeloops.py to replace the hard-to-edit makeloops.cpp. Now writes the .cpp file. - - -changeset: 1743:9de2c44d1b0e -user: Patrik Jonsson -date: Fri Jun 10 13:22:09 2011 -0700 -files: blitz/array-impl.h blitz/listinit.h -description: -Changed Array ListInitializer to use the array iterator, which fixes the problems in the initialize test case but breaks the storage one since the iterators are buggy. - - -changeset: 1742:b1be75056cc8 -user: Patrik Jonsson -date: Fri Jun 10 13:21:38 2011 -0700 -files: blitz/globeval.cc -description: -Fixed an erroneously commented out line. - - -changeset: 1741:a6d2118de27a -user: Patrik Jonsson -date: Fri Jun 10 10:44:04 2011 -0700 -files: blitz/array/functorExpr.h blitz/array/methods.cc blitz/array/reduce.h blitz/array/stencil-et.h blitz/globeval.cc blitz/memblock.cc blitz/memblock.h blitz/simdtypes.h blitz/tinymat2.h testsuite/loop1.cpp testsuite/troyer-genilloud.cpp -description: -Fixed some test failures introduced by the TinyVector alignment. - - -changeset: 1740:b1e1f01fe282 -user: Patrik Jonsson -date: Thu Jun 09 19:19:18 2011 -0700 -files: blitz/tuning.h test.cc -description: -Changed tuning for icpc to not unroll. It makes no diff to the performance. - - -changeset: 1739:26ee3e7ea757 -user: Patrik Jonsson -date: Thu Jun 09 18:35:15 2011 -0700 -files: benchmarks/loop3.cpp blitz/tinyvec2.cc blitz/tuning.h src/globals.cpp -description: -Improved the prevention of loop elimination in loop3 by making the sink() function extern. Added macro BZ_TV_EVALUATE_UNROLL_LENGTH to be defined if TinyVectors below some length should be unrolled, and set this option for icpc in tuning.h. icpc now has very good TV performance, but gcc kinda sucks. - - -changeset: 1738:e286af827d8e -user: Patrik Jonsson -date: Thu Jun 09 16:10:29 2011 -0700 -files: benchmarks/loop3.cpp blitz/benchext.cc blitz/benchext.h blitz/memblock.cc blitz/memblock.h blitz/simdtypes.h blitz/tinyvec.h blitz/tinyvec2.cc blitz/tinyvec2.h configure.ac m4/ac_cxx_alignment_directive.m4 m4/ax_prefix_config_h.m4 -description: -Merged the alignment changes from the cvs repo. (Grr...) - - -changeset: 1737:8226fb873368 -user: Patrik Jonsson -date: Thu Jun 09 16:04:44 2011 -0700 -files: blitz/globeval.cc -description: -Added shortcut evaluation for 1-element expressions of any rank. - - -changeset: 1736:d6062dec88de -user: Patrik Jonsson -date: Thu Jun 09 16:01:47 2011 -0700 -files: benchmarks/loop3.cpp -description: -Upped the number of iterations for small sizes to decrease noise. - - -changeset: 1735:31ad3c728214 -user: Patrik Jonsson -date: Thu Jun 09 15:00:13 2011 -0700 -files: blitz/meta/vecassign.h blitz/tinyvec2.cc -description: -Added an alternative to meta-unroll the TV evaluation. - - -changeset: 1734:cdaa0193fc68 -user: Patrik Jonsson -date: Thu Jun 09 14:59:11 2011 -0700 -files: blitz/range.h -description: -Added numTVoperands to Range. - - -changeset: 1733:908a72324c41 -user: Patrik Jonsson -date: Thu Jun 09 13:56:17 2011 -0700 -files: test.cc -description: -Changed test to a simple TV test. - - -changeset: 1732:deab9d6bea53 -user: Patrik Jonsson -date: Wed Jun 08 22:36:34 2011 -0400 -files: blitz/levicivita.h -description: -Added numTV/TMoperands to Levicivita object. - - -changeset: 1731:487b90783e8d -user: Patrik Jonsson -date: Wed Jun 08 22:33:54 2011 -0400 -files: blitz/array/asexpr.h blitz/array/expr.h blitz/array/fastiter.h blitz/array/map.h blitz/array/where.h blitz/indexexpr.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tm2fastiter.h blitz/tv2fastiter.h -description: -Added a TinyVector-specific evaluation function that kicks in for TV-only expressions and is much simpler. - - -changeset: 1730:89cf0320a448 -user: Patrik Jonsson -date: Wed Jun 08 14:05:07 2011 -0700 -files: benchmarks/loop3.cpp -description: -Tweaked loop3 test to include TinyVector. - - -changeset: 1729:bd0975bd5d2a -user: Patrik Jonsson -date: Wed Jun 08 13:21:26 2011 -0700 -files: blitz/array/functorExpr.h blitz/array/map.h blitz/array/reduce.h blitz/array/where.h blitz/indexexpr.h -description: -Updated shift functions for multicomponent functionality for functors, index expressions and mappings, reductions and where statements. - - -changeset: 1728:44b0d155ac28 -user: Patrik Jonsson -date: Wed Jun 08 12:21:22 2011 -0700 -files: blitz/array/expr.h blitz/indexpar.h -description: -Undid _bz_Indexpar stuff because it doesn't work and I don't know what's going on. - - -changeset: 1727:ed46b21750b4 -user: Patrik Jonsson -date: Wed Jun 08 11:55:58 2011 -0700 -files: blitz/array/expr.h blitz/indexpar.h -description: -Started converting use of BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE to the _bz_Indexpar class, but something's wrong. - - -changeset: 1726:7214fcf29e45 -user: Patrik Jonsson -date: Wed Jun 08 11:53:13 2011 -0700 -files: blitz/array/stencil-et.h -description: -Added comment. - - -changeset: 1725:c5319e4b1eea -user: Patrik Jonsson -date: Wed Jun 08 11:05:12 2011 -0700 -files: benchmarks/loop2.cpp benchmarks/loop3.cpp blitz/array-impl.h blitz/array/funcs.h blitz/array/map.h blitz/tinymat2.cc blitz/tinymat2.h blitz/tinyvec2.cc blitz/tinyvec2.h testsuite/levicivita.cpp testsuite/shape.cpp -description: -Levicivita object now works again. Container operator() and index mappings were updated to work with all containers. All tests except 'initialize' now pass. - - -changeset: 1724:d3dc59c5adb6 -user: Patrik Jonsson -date: Tue Jun 07 23:17:53 2011 -0400 -files: benchmarks/loop1.cpp blitz/benchext.cc blitz/vector2.h testsuite/initialize.cpp testsuite/loop1.cpp -description: -Added file vector2.h which declares Vector by deriving from Array. It doesn't have the same syntax as the old vector, though. - - -changeset: 1723:bedb9b347ded -user: Patrik Jonsson -date: Tue Jun 07 22:54:43 2011 -0400 -files: blitz/benchext.cc blitz/benchext.h testsuite/qcd.cpp -description: -Changed qcd benchmark to use rank-1 Arrays instead of Vectors. This also required changes to benchext. - - -changeset: 1722:e38757c62997 -user: Patrik Jonsson -date: Tue Jun 07 18:16:32 2011 -0400 -files: blitz/array/stencil-et-macros.h blitz/generate/genstencils.py testsuite/stencil-et.cpp -description: -Updated stencil-et-macros.h to not have C++-style comments in the macros. - - -changeset: 1721:fe724f8ca2d3 -user: Patrik Jonsson -date: Tue Jun 07 17:08:18 2011 -0400 -files: blitz/array/asexpr.cc blitz/array/expr.h blitz/array/fastiter.h blitz/array/multi.h blitz/array/stencil-et-macros.h blitz/array/stencilops.h blitz/generate/genstencils.py blitz/tinymat2.cc blitz/tinymat2.h blitz/tm2fastiter.h blitz/tv2fastiter.h test.cc testsuite/stencil-et.cpp -description: -Stencils now work, at least to the point that the tests using predefined stencils pass. This required adding the shift() methods to the type selection machinery, which remains to be done for the specialized ET classes, and changing the signatures of some stencil operators. Added TinyMatrix to multicomponent_traits. Also found a bug in the tv and tm copy iterators that caused iterators initialized from temporaries to be bad. - - -changeset: 1720:c0f10b956e41 -user: Patrik Jonsson -date: Mon Jun 06 17:41:41 2011 -0400 -files: blitz/generate/genstencils.py -description: -Added boilerplate multicomponent typedefs to stencil classes, but this is in many cases incorrect. The stencils that aren't simple element-wise operations need more thought. - - -changeset: 1719:45a26e6719a9 -user: Patrik Jonsson -date: Mon Jun 06 14:51:09 2011 -0400 -files: blitz/array.cc -description: -Fixed bad path to range.cc file in previous commit. - - -changeset: 1718:483f43690120 -user: Patrik Jonsson -date: Mon Jun 06 14:48:31 2011 -0400 -files: blitz/array.cc blitz/range.cc blitz/range.h test.cc testsuite/tvinitialize.cpp -description: -Added tvinitialize test case, and range.cc to the files included by array.cc. - - -changeset: 1717:d1aacd22a60d -user: Patrik Jonsson -date: Mon Jun 06 11:30:46 2011 -0700 -files: testsuite/tvinitialize.cpp -description: -Added test for TinyVector list initializer - - -changeset: 1716:fdb226333e76 -user: Patrik Jonsson -date: Mon Jun 06 14:28:36 2011 -0400 -files: blitz/array/expr.h blitz/array/functorExpr.h blitz/update.h testsuite/Makefile.am testsuite/multicomponent-2.cpp testsuite/patrik-jonsson-1.cpp testsuite/reduce.cpp -description: -Added multicomponent support to functors, and added testcase multicomponent-2 to test it. Added test of reductions of expressions (these were broken even before). Fixed dereferencing operators on ternary and quaternary expressions. - - -changeset: 1715:84d238c675e7 -user: Patrik Jonsson -date: Mon Jun 06 11:17:10 2011 -0400 -files: testsuite/safeToReturn.cpp -description: -Fixed safeToReturn testcase. - - -changeset: 1714:4be3fc66da39 -user: Patrik Jonsson -date: Mon Jun 06 11:05:13 2011 -0400 -files: blitz/array/expr.h blitz/array/map.h blitz/array/where.h testsuite/reduce.cpp -description: -Added reductions of expressions to reduce testcase and added first_value functions to expression classes. Unclear how to implement for index mapping. - - -changeset: 1713:49d1756cefcb -user: Patrik Jonsson -date: Sun Jun 05 22:52:08 2011 -0400 -files: blitz/array/asexpr.h blitz/array/expr.h blitz/array/reduce.h -description: -Expressions now respect returntype imposed by operator. - - -changeset: 1712:0fb16673a558 -user: Patrik Jonsson -date: Sun Jun 05 22:24:06 2011 -0400 -files: blitz/array-impl.h blitz/array/expr.h blitz/array/map.h blitz/array/ops.cc blitz/array/reduce.h blitz/array/where.h blitz/globeval.cc blitz/indexexpr.h testsuite/peter-nordlund-1.cpp -description: -Added multicomponent types for reductions, mappings and where. All tests that compile pass. - - -changeset: 1711:27726264ee65 -user: Patrik Jonsson -date: Sun Jun 05 18:37:28 2011 -0700 -files: testsuite/arrayinitialize.cpp -description: -Added test that list initializer is working. - - -changeset: 1710:f91aea787e4b -user: Patrik Jonsson -date: Sun Jun 05 17:40:24 2011 -0400 -files: blitz/array.cc blitz/array/expr.cc blitz/array/ops.cc blitz/tinymat2.cc blitz/tinymat2.h blitz/tinyvec2.cc blitz/tm2ops.cc testsuite/multicomponent.cpp -description: -Further tweaks to multicomponent: return types, updater types. Fleshed out multicomponent test case, which now passes. - - -changeset: 1709:d9fb47fca6aa -parent: 1708:ad4ee52d4118 -parent: 1701:0a7d38f46c73 -user: Patrik Jonsson -date: Sun Jun 05 16:00:56 2011 -0400 -description: -Merged changes from governator. - - -changeset: 1708:ad4ee52d4118 -user: Patrik Jonsson -date: Sun Jun 05 16:00:30 2011 -0400 -files: blitz/array/asexpr.h blitz/array/expr.h blitz/tinymat2.h blitz/tm2fastiter.h testsuite/multicomponent.cpp -description: -Added multicomponent machinery for ternary and quaternary expressions, and for TinyMatrix. - - -changeset: 1707:3eb872d83f25 -user: Patrik Jonsson -date: Sun Jun 05 15:19:33 2011 -0400 -files: blitz/array/ops.cc blitz/globeval.cc blitz/indexexpr.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tv2fastiter.h testsuite/multicomponent.cpp -description: -Now multi-multicomponent containers also work, at least as far as basic unary and binary expressions. - - -changeset: 1706:ac56148d2e28 -user: Patrik Jonsson -date: Sun Jun 05 13:00:54 2011 -0400 -files: blitz/array/asexpr.h blitz/array/expr.h testsuite/multicomponent.cpp -description: -Scalar expressions now appear to work, both POD scalars and scalar()-wrapped components. - - -changeset: 1705:5f2e985727da -user: Patrik Jonsson -date: Sun Jun 05 12:20:47 2011 -0400 -files: blitz/array/asexpr.h blitz/array/expr.h blitz/array/fastiter.h blitz/ops.h blitz/tinyvec2.h blitz/tv2fastiter.h testsuite/multicomponent.cpp -description: -Binary multicomponent expressions now also work. Scalars don't. - - -changeset: 1704:b2c4fbbcf808 -user: Patrik Jonsson -date: Sat Jun 04 22:41:41 2011 -0400 -files: blitz/array/asexpr.cc blitz/array/asexpr.h blitz/array/expr.h blitz/array/fastiter.h blitz/array/newet-macros.h blitz/array/ops.cc blitz/etbase.h blitz/globeval.cc blitz/levicivita.h blitz/ops.h blitz/range.h blitz/tv2fastiter.h blitz/update.h testsuite/multicomponent.cpp -description: -A lot of type fiddling to get operations on multicomponent arrays to work. Evaluating an expression on a container whose T_numtype is another container now returns a new expression with iterators that point to the subcomponents data. Introduced new types in expression classes and traits classes to get this to work. Only UnaryMinus is implemented currently. - - -changeset: 1703:fe2cef578df4 -parent: 1702:deaeeb8ae3cc -parent: 1700:f7c16a642396 -user: Patrik Jonsson -date: Fri Jun 03 22:55:51 2011 -0400 -description: -Merged in multicomponent test. - - -changeset: 1702:deaeeb8ae3cc -parent: 1699:532db168738e -user: Patrik Jonsson -date: Fri Jun 03 22:50:18 2011 -0400 -files: blitz/array-impl.h blitz/array.cc blitz/array/asexpr.cc blitz/array/asexpr.h blitz/array/expr.h blitz/array/stencilops.h blitz/array/where.h blitz/bounds.h blitz/et-forward.h blitz/range.cc blitz/range.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tv2fastiter.h blitz/tv2ops.cc blitz/tvecglobs.h test.cc -description: -More header file work. Range now fulfills the ET specifications. - - -changeset: 1701:0a7d38f46c73 -user: Patrik Jonsson -date: Sun Jun 05 12:58:42 2011 -0700 -files: blitz/tinymat2.cc -description: -Forgot to commit TinyMatrix implementation file before. - - -changeset: 1700:f7c16a642396 -user: Patrik Jonsson -date: Fri Jun 03 19:54:34 2011 -0700 -files: testsuite/multicomponent.cpp -description: -Started working on a test that multicomponent expressions work. - - -changeset: 1699:532db168738e -user: Patrik Jonsson -date: Fri Jun 03 17:22:24 2011 -0700 -files: blitz/array/asexpr.cc blitz/array/asexpr.h blitz/array/domain.h blitz/array/expr.cc blitz/array/expr.h blitz/array/fastiter.h blitz/array/funcs.h blitz/array/map.h blitz/array/ops.h blitz/array/reduce.h blitz/array/stencil-et.h blitz/et-forward.h blitz/indexmap-forward.h blitz/meta/vecassign.h blitz/prettyprint.h blitz/reduce.h blitz/shapecheck.h blitz/tinyvec2.h test.cc testsuite/Makefile.am testsuite/stencil-et.cpp -description: -Worked on making the header files include cleanly using forward declarations to avoid the messy include dependencies. - - -changeset: 1698:586e114347ea -user: Patrik Jonsson -date: Fri Jun 03 08:45:47 2011 -0700 -files: testsuite/tinyvec.cpp -description: -Updated tinyvec test to test correct application of functions. - - -changeset: 1697:42f97230ee60 -user: Patrik Jonsson -date: Thu Jun 02 22:25:32 2011 -0700 -files: blitz/array/ops.cc blitz/array/stencil-et-macros.h blitz/array/stencilops.h testsuite/extract.cpp testsuite/matthias-troyer-1.cpp testsuite/matthias-troyer-2.cpp -description: -Fixed typos in array/stencilops.h and restored the stencil ET macros in stencil-et-macros.h so users can define their own. 3 more tests now run, but there is a problem with expressions in multicomponent arrays that will take some thinking. - - -changeset: 1696:4889434a766b -user: Patrik Jonsson -date: Thu Jun 02 19:58:08 2011 -0700 -files: blitz/array/asexpr.h blitz/array/funcs.h blitz/globeval.cc blitz/levicivita.h blitz/tv2ops.cc testsuite/Makefile.am testsuite/levicivita.cpp testsuite/tinyvec.cpp testsuite/where.cpp -description: -Added global functions dot() and cross() to funcs.h. These work on all types of expressions. The cross product is done with an expression object LeviCivita which represents the Levi-Civita symbol in 3d. Added a test case for the cross product and augmented tinyvec test. - - -changeset: 1695:26cab2b56941 -user: Patrik Jonsson -date: Thu Jun 02 13:17:33 2011 -0700 -files: blitz/array-impl.h blitz/array/fastiter.h blitz/array/stencil-et.h blitz/array/stencilops.h blitz/generate/Makefile.am blitz/generate/genstencils.py blitz/tinymat2.h testsuite/tinymat.cpp -description: -Restored stencil functionality. Removed the long stencil macros in stencil-et.h, since they make the generated code impossible to debug, and instead made a python script getstencils.py that generates the file stencil-classes.cc. - - -changeset: 1694:c89e0d6018aa -user: Patrik Jonsson -date: Wed Jun 01 20:58:09 2011 -0700 -files: blitz/array/asexpr.h blitz/tinymat2.h blitz/tinymat2io.cc blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tinyvec2io.cc blitz/tm2fastiter.h blitz/tm2ops.cc blitz/tv2fastiter.h -description: -Cranked out an implementation of TinyMatrix that at least works internally. Need to sort out header dependencies. - - -changeset: 1693:d4382faa1756 -user: Patrik Jonsson -date: Wed Jun 01 11:12:43 2011 -0700 -files: blitz/array-impl.h blitz/array/asexpr.h blitz/array/fastiter.h blitz/array/ops.cc blitz/array/reduce.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tv2ops.cc test.cc testsuite/Makefile.am testsuite/initialize.cpp testsuite/reduce.cpp testsuite/tinyvec.cpp -description: -Further tweaks to make tests pass. All tests that compile now pass. Stencils remain, as do defining dot product for new expressions. - - -changeset: 1692:363fc19b1e4a -user: Patrik Jonsson -date: Tue May 31 21:44:59 2011 -0700 -files: blitz/array-impl.h blitz/array/domain.h blitz/array/functorExpr.h blitz/array/reduce.cc blitz/array/reduce.h blitz/array/where.h blitz/globeval.cc blitz/indexexpr.h blitz/tinyvec2.h -description: -Changed includes from tinyvec.h to tinyvec2.h. Several tweaks to make testsuite compile. Still not there though. - - -changeset: 1691:86c3627f3114 -user: Patrik Jonsson -date: Tue May 31 21:27:59 2011 -0700 -files: blitz/array-impl.h blitz/array/asexpr.h blitz/array/expr.h blitz/array/fastiter.h blitz/array/map.h blitz/array/ops.h blitz/array/reduce.h blitz/array/slice.h blitz/array/storage.h blitz/array/where.h blitz/globeval.cc blitz/ops.h blitz/ranks.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tinyvec2io.cc blitz/tv2fastiter.h blitz/tv2ops.cc blitz/tvecglobs.h test.cc -description: -Array now uses the new TinyVector in tinyvec2.h in place of the old one. A few things are nonfunctional because they depend on TinyMatrix. - - -changeset: 1690:1dad478e2fd7 -user: Patrik Jonsson -date: Tue May 31 18:43:06 2011 -0700 -files: blitz/array/fastiter.h blitz/array/map.h blitz/array/slice.h blitz/globeval.cc blitz/indexexpr.h blitz/indexmap-forward.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tv2fastiter.h test.cc -description: -Moved TinyVector2 expressions back to the iterator, as just forwarding fastRead allows vectorization. Added support for TV2 index expressions. - - -changeset: 1689:25a35896ffb4 -user: patrik@governator.ucsc.edu -date: Tue May 31 16:41:03 2011 -0700 -files: blitz/array/expr.h blitz/array/fastiter.h blitz/globeval.cc blitz/tinyvec2.h blitz/tv2fastiter.h -description: -Changed so TinyVector2 expressions use the TV2 objects directly and not the iterators. This allows the loops to be vectorized, otherwise the 'defereference is too complex'. - - -changeset: 1688:a7f9abef3017 -user: Patrik Jonsson -date: Tue May 31 16:52:07 2011 -0400 -files: test.cc -description: -Small test program. - - -changeset: 1687:2a507de3ea58 -user: Patrik Jonsson -date: Tue May 31 16:32:58 2011 -0400 -files: blitz/array-impl.h blitz/array/ops.cc blitz/globeval.cc blitz/tinyvec2.h blitz/tv2fastiter.h blitz/tv2ops.cc -description: -Make the Array evaluate() function a global one that works on any container that has the required methods. That way it also works for TinyVector2. - - -changeset: 1686:36b9e6e658a2 -user: Patrik Jonsson -date: Tue May 31 12:46:03 2011 -0400 -files: blitz/array/fastiter.h blitz/constpointerstack.h blitz/tinyvec2.cc blitz/tinyvec2.h blitz/tinyvec2io.cc blitz/tv2assign.h blitz/tv2fastiter.h blitz/tv2ops.cc -description: -Bare-bones implementation of TinyVector2 using a FastTV2iterator for expressions. - - -changeset: 1685:1946604d4724 -parent: 1599:50fc41194253 -user: convert-repo -date: Fri May 27 00:13:58 2011 +0000 -files: .hgtags -description: -update tags - - diff --git a/ChangeLog.0 b/ChangeLog.0 deleted file mode 100644 index 31218efd..00000000 --- a/ChangeLog.0 +++ /dev/null @@ -1,303 +0,0 @@ -- fixed problem with += and tensor indices, thanks Adnene Ben Abdallah -- fixed problem in testsuite/wei-ku-1 -- changed "egcs" to "gcc" in documentation, configure scripts -- incorporated patch from Julian Cummings for missing preexistingMemoryPolicy - parameter in Vector constructor -- when constructing an array from an expression, use the storage - format of the first array in the expression (rather than always - using C-style storage) (bug reported by Peter Bienstman) -- patch by Theodore Papadopoulo for max() on arrays of non-positive - floating-point numbers -- updated email addresses, web pages in source files -- incorporated code by Adam Levar for input - of tinyvecs -- changed BZ_DISABLE_XOPEN_SOURCE to BZ_ENABLE_XOPEN_SOURCE; it is - disabled now by default (it caused problems for HP and Solaris) -- added 1-D convolutions -- fixed core dump in testsuite/Olaf-Ronneberger-1, found by Wei Ku -- fixed namespace problems found by Martin Reinecke and Wei Ku -- fixed bug found by Wei Ku in loop collapse optimizations -- fixed bug found by Michael Aivazis in the array io; missing - "return os;" in operator<<(ostream,Array) -- fixed bug found by Peter Bienstman where expression involving - empty arrays would loop forever -- SGI C++ 7.3 now supported (thanks Bill Homer) -- 'make clean' now removes blitz/config.h and config.cache -- fixed problem with polar(x,y) in compiler/compmath.cpp -- added real persistence: operator<< to output N-dimensional arrays - to ostreams, operator>> to input N-dimensional arrays from istreams -- can now construct arrays from array expressions -- in theory, C++ Builder 4 is now supported. Oleg Orlov found 2 bugs - which have now been fixed. -- stencils are expression-templatized!! This means you can use stencils - in array expressions, e.g. A = Laplacian2D(B) / (h*h); -- updated documentation, index added to postscript version -- new random number generators library: uniform, normal, exponential, - beta, gamma, chisquare, F, discrete uniform (see random/ and the new - chapter in the user guide) -- added new partial/full 1D reduction: last(X) returns index where - X is last true -- support for egcs __restrict__ -- incorporated Allan Stoke's Intel C++ patches -- new div, grad, curl, difference operators on vector fields; stencil - patterns shown graphically in documentation -- new expression templates implementation is on by default now -- should work on all platforms now; - I've included a hacked-up version of from libstdc++ - (blitz/limits-hack.h) -- Mersenne Twister RNG; see blitz/rand-mt.h; adapted by Allan Stokes -- nicer syntax for constructing Fortran-style arrays: - Array A(3,3,fortranArray); -- fixed reductions minIndex and maxIndex for multidimensional - arrays; thanks Peter Nordlund -- faster compile times -- ConstArrayIterator, ArrayIterator: usable STL-style forward - iterators for Arrays -- new math functions for complex arrays: arg, conj, polar - (real and imag were provided in an earlier release) -- when creating an array from pre-existing data, now have an - option of duplicateData, deleteDataWhenDone, neverDeleteData -- added user-defined expression template functions: see - examples/useret.cpp -- added Array::reindex(), reindexSelf() contributed by - Derrick Bass -- fixed bugs in Array::isStorageContiguous(), thanks Matthias - Lindström -- fixed bug in stencils related to automatic determination - of spatial extent; another due to integer literals -- fixed major bug in reverseSelf() -- fixed memory leak when creating arrays from pre-existing data - (thanks Matthias Troyer and Pierre-Alain Genilloud) -- removed finite and trunc math functions (these apparently are - only available under AIX?); thanks Prem Anand Manmohanrao -- incorporated several patches by Tim Brunne: typos in - benchmarks/acousticf.f and benchmarks/acousticf2.f; guarded - inclusion in benchmarks/acoustic.cpp; removed $^ - in testsuite/Makefile (not recognized by osf make); -- fixed bug w/ scoping of IEEE math functions -- added -fno-gcse to default options for egcs, will maybe minimize - memory gobbling bug in egcs -- fixed problems with egcs and OSF related to _XOPEN_SOURCE_EXTENDED - (thanks to Petter Urkedal for finding this mysterious bug) -- fixed problem with compiler/compmth2.cpp and compiler/compmath.cpp - (thanks Andy Jewell) -- moved blitz/minmax.h into its own namespace (blitz::minmax) to avoid - conflicts in blitz/array/reduce.h; thanks Peter Nordlund -- fixed problem in array/stencilops.h, thanks Dr. Josef Grosch -- fixed problem with Array when T does not have trival - ctors/dtors; patches by Petter Urkedal - -Version 0.4, August 6, 1998 -- new benchmark results on a variety of platforms; see - http://monet.uwaterloo.ca/blitz/benchmarks/ -- indirection for arrays: A[point-list], A[subdomain-list], - A[indexSet(indexlist1,indexlist2,...)] -- new stencil objects, make finite differencing much nicer -- computational fluid dynamics example (currently broken) -- ported and tuned for DECcxx -- central, forward, and backward difference stencil operators -- curl, div, grad, laplacian stencil operators -- support for vector fields/stencil objects (vector versions of curl, - div, grad) -- BZ_ENUM_CAST kludge -- fixed problems with math functions and namespaces -- implemented RectDomain -- more tidying in blitz/compiler -- loop unrolling is now OFF by default -- incorporated patches from Theo Papadopoulo for setting compiler - names and flags, and blas path -- vastly improved documentation -- added documentation for numeric inquiry functions - -Version 0.3 alpha 01, April 12 1998 -- added where(A,B,C) for arrays; as a replacement for ?: operator -- zip(expr1, expr2, T_component) to "zip" a multicomponent expression - from two scalar-valued expressions -- added real(A) and imag(A) for complex arrays; can be used as lvalues -- added Array::operator[](int), and ::chopComponent(), support for - multicomponent arrays -- Implemented Array::reverse() -- Implemented Array::resizeAndPreserve() -- fixed various bugs found by aCC (thanks Len Lattanzi) -- beefed up compiler/namespac.cpp; egcs is just good enough to pass - the old test but still doesn't handle full namespaces -- added section on multicomponent arrays to documentation -- split complex math functions into two parts; some of the ANSI-required - functions are not provided by KAI C++ -- added new array constructor to create an array from pre-existing data, - with specified strides -- added Array::free() to delete an array's data -- updated the documentation: global functions, platforms, fixed many - errors -- cycleArrays has been changed from a method of Array to a - global function. This may break existing code. See manual - for details (under "Global functions") -- The great source code reorganization has started. arrayexpr.h, - arraymap.h have become array/expr.h, array/map.h, etc. This should - make no difference to user-level code. -- NB: the semantics of transpose() have been changed! This may break - existing code. Now have transpose() and transposeSelf(); reverse() and - reverseSelf(). See manual for details. -- Fixed oversight in type promotion: if one type is user-defined and the - other is an intrinsic type, then promote to the user-defined type -- Implemented blitz::min(a,b) and blitz::max(a,b), with type promotion - (blitz/minmax.h) -- Fixed bug with makeUnique() and arrays created from pre-existing data -- Fixed bugs associated with IEEE/SYSV math functions. Some functions - were called IEEE in one place and SYSV in another, which caused many - problems. -- Fixed bug in compiler/instant.cpp; incorrect syntax for explicit - instantiation request -- Fixed bug in benchmarks/looptest.cpp due to unrolling backwards - loops incorrectly -- added PhysicalConstants.h and SystemOfUnits.h from CERN, by - Michel Maire. These should be supplemented with more constants - from e.g. CRC handbook. ** Update: these won't be included until - a code sharing agreement with CERN is signed. ** - -Version 0.2 alpha 06, April 12 1998 -- serious makeover of the Blitz++ web pages -- Blitz++ development list (blitz-dev) started; see - http://seurat.uwaterloo.ca/blitz/contribute/ -- interlaced arrays seriously implemented: interlaceArrays() and - allocateArrays(). The first always interlaces; the second only - interlaces if it's advantageous for the architecture (controlled - by the BZ_INTERLACE_ARRAYS flag in ) -- new type promotion mechanism -- fixed use of bool (instead of _bz_bool) in -- various minor bug fixes -- included benchmarks/looptest.cpp: tests a variety of C loop styles - to find which one gives best performance. This will be a start for - automatic tuning someday. -- added lots of comments to to explain stack - iteration -- finally, stable compiles with EGCS -- honour BZ_HAVE_NAMESPACES in examples and benchmarks -- will now build into a separate directory (thanks Brendan Kehoe) -- integrated GNU autoconf and configure utilities; many thanks to - John W. Eaton and Brendan Kehoe for their help -- exciting new benchmark results: see web page -- stable compiles with egcs-980328 -- bzconfig script will now run in non-interactive mode -- archive now unpacks into blitz-YYMMDD, instead of just Blitz++ -- fixed bug related to globals in -- fixed bug in unrolling of 1-D array expressions with common, non-unit - stride , added check to test suite -- ColumnMajor changed to ColumnMajorArray to avoid conflict with - matrix class of the same name - -Version 0.2 alpha 05 March 13 1998 -- Blitz++ is now distributed under the terms of the GNU General Public - License -- solid EGCS support (see http://egcs.cygnus.com/) -- unfortunately, Cray C++ support broken by requiring ; this can - be fixed if anyone really wants it. -- libblitz.a now contains global instances (so far, just tensor index - objects) -- one-step installation if using KAI C++ (cd Blitz++; make) -- various problems with multiple module programs fixed (I hope) -- partial integration with Tau profiling tools - http://www.cs.uoregon.edu/research/paracomp/tau/ -- total reductions -- arrays with different index sets may no longer be used in the - same expression (e.g. adding a base-1 array to a base-0 array). - This causes ambiguities for expressions such as sum(A+B+i)-- from - which array should the index i take its values? -- shape checking for array expressions (in debug mode only) -- added pretty printing for array expressions; will be used for - tracing and profiling, shape checking -- fixed bug in loop collapse optimizations which broke examples/storage.cpp -- minor changes to documentation -- added ColumnMajor array storage order -- full implementation of Array::resize() -- Array::permute() renamed to transpose() -- duplicate Array member function deprecated: length() -- start of automated test suite - -Version 0.2 alpha 04 September 1997 -- EGCS port (see http://www.cygnus.com/egcs/) -- many new performance benchmarks for vector & array operations -- more tuning of vector & array operations -- several bug fixes - -Version 0.2 alpha 03 September 1997 -- Intel C++ port (Windows'95 and NT). Compiler test suite won't work; - just copy to -- inlining groups (BZ_INLINE_GROUP1, BZ_INLINE_GROUP2) in - can be used to disable inlining of certain operations -- in evaluating array expressions, multidimensional loops are collapsed - to 1D when possible -- automatic tiling for two-dimensional array stencils -- Array ctor to create arrays from pre-existing data (e.g. Fortran arrays) -- new benchmark: stencil.cpp (3D, 7-pt stencil) -- new benchmark: loops.cpp (measures performance of various C loops) -- new benchmark: acou3d.cpp (3-D PDE) -- bug fix for subarrays of subarrays -- common & unit stride optimizations for array expressions -- new example: transform.cpp -- indirection now used for innermost loop of array expressions; is faster - than pointer increments -- new benchmark: acoustic.cpp (2-D PDE) -- list initializers for Vector -- fixed bug in Vector::reverse() -- optimizations for PDEs: allocateArrays() (for interlaced arrays), - cycleArrays() (for convenient array relabelling) -- compiler test suite now generates a log file - -Version 0.2 alpha 02 August 1997 -- Several additions to compiler test suite -- Cray T3E port (Cray C++ 3.0.0.0) -- Honour absence of , , ; needed for Cray & SGi -- Compatibility with old for scoping rules (needed for Cray C++) -- Array::permute(..) implemented -- cross products for TinyVector -- New example: creating arrays of user types -- Updated documentation -- New reduction: first(expr, index) returns the first index value for - which expr evaluates true - -Version 0.2 alpha 01 July 1997 -- New to this release: Array -- added -- fixed bug in TinyVector::length() -- added TinyVector(x1,x2,x3,...,xn) constructors - -Version 0.1 alpha 04b -- fixed bug with index() in VectorPick -- fixed fortran compatability problems in benchmarks -- added qcd and haney benchmarks to distribution -- added check in examples/erf.cpp; this example won't compile - without BZ_HAVE_IEEE_MATH - -Version 0.1 alpha 04 -- sum(), product() metaprograms for TinyVector -- min(), minValue(), minIndex(), max(), maxValue(), maxIndex() - for TinyVector (loops not unravelled) -- norm(), norm1() for TinyVector (loop not unravelled) -- Added any(), all(), and count() for vector expressions -- Fixed bug in Vector::makeUnique() -- Revised random number generators to take a template parameter - for the uniform generator -- Added discrete uniform generator, in - -Version 0.1 alpha 03 February 1, 1997 -- Added TinyVector class & expression templates support -- Added preconditions for mean(..) in -- Tidied many #include directives to include header files only - if necessary (#ifndef BZ_xxx ... #include ... #endif) - Should reduce preprocessing time. - -Version 0.1 alpha 02 January 27, 1997 -- improved documentation -- complex operands in expression templates are templated, rather than - providing different specializations for float, double, long double -- full implementation of where(X,Y,Z) for vectors -- wrote new Benchmark class with external control model; in -- fixed error in return type of mean() in -- changed debug flag in to BZ_DEBUG (from just DEBUG) -- added some comments - -Version 0.1 alpha 01 January 24, 1997 -- alpha release: Vector, expression templates, VectorPick, - Random, Random, Benchmark & Timer - diff --git a/ChangeLog.1 b/ChangeLog.1 deleted file mode 100644 index a0f68a12..00000000 --- a/ChangeLog.1 +++ /dev/null @@ -1,3573 +0,0 @@ - -2009-11-23 Patrik Jonsson - - * blitz/array-impl.h, blitz/indexexpr.h, blitz/range.h, - blitz/tvecglobs.h, blitz/array/domain.h, blitz/array/eval.cc, - blitz/array/fastiter.h, blitz/array/functorExpr.h, - blitz/array/methods.cc, blitz/array/slicing.cc, - blitz/array/stencil-et.h, blitz/array/where.h, blitz/meta/dot.h, - blitz/meta/product.h, blitz/meta/sum.h, config/mdate-sh: Check-in - of the previously posted 64-bit patch to allow >2^31 elements in - an array. - -2009-10-20 Patrick Guio - * Makefile.am: Force to run a make in the blitz directory before - make in the lib directory to ensure that the all targets in blitz - subdirectories (like blitz/generate are up-to-date). - -2009-08-21 Patrick Guio - - * blitz/generate/Makefile.am: Cleaned targets, rules and dependencies. - -2009-08-17 Patrick Guio - - * blitz/Makefile.am blitz/array/Makefile.am: Added generated headers - (genheaders) to `clean-local' target. - * blitz/generate/Makefile.am: Added EXTRA_PROGRAMS dependency to the - all-am target. - -2009-06-29 Patrick Guio - - * configure.ac: Added call to set up directory `m4' as an additional - local Autoconf macro directory. - Test availability of `makeinfo' and set up an automake conditional. - * doc/Makefile.am: Set up `makeinfo' targets only when command available. - * doc/examples/slicing.cpp: fixed use of toEnd (fromStart and - toEnd are not defined within the Range class any longer). - - -2009-04-24 Theodore Papadopoulo - * blitz: array-impl.h array/methods.cc: add const qualifier to the - transpose method. - -2009-03-30 Theodore Papadopoulo - * blitz/array/iter.h: Replace uses of dataFirst() by data() to remove - iteration errors on eg reversed arrays. Corrected checks for this case. - * testsuite/slice-iterators.cpp: Add a test. - -2009-03-26 Patrick Guio - - * blitz/: matbops.h, mathfunc.h, matuops.h, promote-old.h, - vecbfn.cc, vecbops.cc, vecuops.cc, vecwhere.cc, array/bops.cc, - array/uops.cc: - Files removed from repository as they are automatically generated by codes - located in blitz/generate directory. - -2009-03-26 Patrick Guio - - * blitz/: matbops.h, mathfunc.h, matuops.h, promote-old.h, - vecbfn.cc, vecbops.cc, vecuops.cc, vecwhere.cc, array/bops.cc, - array/uops.cc: - Last changes registered before the files are removed from the repository - and copied to the repository Attic. - -2009-03-26 Patrick Guio - - * blitz/generate/genvecuops.cpp: - Typo fixe. - -2009-03-26 Patrick Guio - - * blitz/generate/: bzfstream.h, genarrbops.cpp, genarruops.cpp, - genmatbops.cpp, genmathfunc.cpp, genmatuops.cpp, genpromote.cpp, - genvecbfn.cpp, genvecbops.cpp, genvecuops.cpp, genvecwhere.cpp: - Added cvs `revision' keyword of the generator code into the generated - file. - -2009-03-26 Patrick Guio - - * blitz/generate/genmathfunc.cpp: - Updated to generate the current revision of header . - The changes in revision 1.13 and 1.14 of the header - were done on the file itself and not by the code generator, which should - not have happened since is supposed to be generated. - The change introduced in revison 1.14 (include ) has not been - reflected in the code generator since it is already included in - . - -2009-03-25 Patrick Guio - - * blitz/generate/: arroperands.h, genmathfunc.cpp, operands.h, - operands2.h: - Removed unnecessary include header . - -2009-03-25 Patrick Guio - - * blitz/generate/: arroperands.h, operands.h, operands2.h: - Explicitly include header for declaration of C string functions - strlen() and strcmp(). - -2009-03-24 Patrick Guio - - * blitz/generate/genmathfunc.cpp: - Explicitly include header for declaration of C string functions - strlen() and strcmp(). - -2009-03-10 Julian Cummings - - * blitz/applics.h: Clean up applicator typedefs and standardize - usage with apply method. All applicator types now return the type - T_numtype. Removed all typedefs of T_promote here, as only - T_numtype is needed. - * blitz/vecexpr.h: Extract return type T_numtype for _bz_VecExprOp - from T_op type. - * testsuite/where.cpp: Add a quick test of where operation on - TinyVector. - * blitz/vector.h: Added include of vecwhere.h header file for - where operation on vector types. - - -2008-08-06 Julian Cummings - * doc/random.texi: Corrected example code in manual illustrating - use of Normal RNG. - - -2008-05-28 Julian Cummings - * blitz/array/slicing.cc: Add extra parentheses around - conditionals involving && operator to eliminate warnings from gcc - 4.3 compiler about ambiguities. Patch provided by Volker Braun - . - * blitz/range.h: Add extra parentheses around conditionals - involving && operator to eliminate warnings from gcc 4.3 compiler - about ambiguities. Patch provided by Volker Braun - . - * testsuite/Makefile.am: Replace patrik-jonsson-2 test with the - zeek-1 test from Eric Zeek. It tests the same Array construction - from a partial reduction, but checks the results. - - -2008-05-24 Theodore Papadopoulo - * blitz/array/reduce.h: Removed an off by one bug introduced with - my previous patch of 2008/05/02. - * testsuite/patrik-jonsson.cpp: Changed to patrik-jonsson-1.cpp. - * testsuite/patrik-jonsson-2.cpp: New test contributed by - Patrik Jonsson. - * testsuite/Makefile.am: Updated correspondingly. - -2008-05-07 Theodore Papadopoulo - * array/functorExpr.h array/stencil-et.h: Yet some more constification - that were missed. - * testsuite/patrik-jonsson.cpp: Added a new test contributed by - Patrik Jonsson to exercise array functors. - -2008-05-07 Patrick Guio - - * Makefile.am: Fixed minor problem detected by Erik Zeek. The file - blitz-uninstalled.pc is for the build tree and should not be installed. - -2008-05-03 Theodore Papadopoulo - - * blitz/array/expr.h: Some more constification. - * blitz/array/where.h: Constification of the ascending, ordering, lbound - and ubound methods that had been overlooked with the previous patch. - * testsuite/where.cpp: Added a test to trigger the problem detected - by Patrick Guio that is corrected with this patch. - -2008-05-02 Theodore Papadopoulo - - * blitz/minmax.h blitz/array/resize.cc blitz/array/stencils.cc - blitz/array/stencils.h testsuite/minmax.cpp: - Renamed the namespace from minmax to extrema to keep the name minmax - for a function name. - * blitz/reduce.h: Simplify and constify the file. Introduced - a new type MinMaxValue and the mechanic to have a minmax reduce - function. Changed the canProvideInitialValue into needInit and clarified - its use. Simplified drastically the design by removing unused contructors - and methods (reset). This potentially makes compilation errors instead of - run time ones, which is preferable. - * blitz/array/where.h: Remove commented out line. - * blitz/array/reduce.h: Introduce the minmax reduce function. Generalized - the _bz_ReduceReset to really take into account the new needInit attribute - which formerly was sketched by canProvideInitialValue, but was not really - used. Constification of the ascending, ordering, lbound and ubound methods. - Simplified drastically the design by removing unused contructors and - methods (reset). This potentially makes compilation errors instead of - run time ones, which is preferable. - * blitz/array/expr.h: Constification of the ascending, ordering, lbound - and ubound methods. Added the first_value method used with the needInit - attribute (see blitz/array/reduce.h). - * blitz/array/map.h: Constification of the ascending, ordering, lbound - and ubound methods. - * blitz/array/reduce.cc: Commonize the code of _bz_reduceWithIndexTraversal - and _bz_reduceWithIndexVectorTraversal by the introduction of - _bz_reduceWithIndexTraversalGeneric, of the adapter _bz_IndexingVariant - and the use of _bz_ReduceReset. Simplified slightly the code by using - returns, which allows for the suppression of the loopFlag variable. - * doc/arrays-expr.texi: Update documentation. - * testsuite/reduce.cpp: Add a test for minmax. - -2008-05-01 Theodore Papadopoulo - * blitz/funcs.h: Constification. - -2008-04-25 Julian Cummings - - * Makefile.am: Add new COPYRIGHT document containing the BSD - copyright language to the list of distribution files. Also add - the zip file of VS2005.NET support files (should have done this a - while ago). - - -2008-04-22 Julian Cummings - * blitz/range.h: Convert enum for bounds values fromStart and - toEnd to standard integral constant type, so that we can be more - flexible for 64-bit support. Eliminate the superfluous copy of - fromStart and toEnd defined within the Range class. Use - standardized integral types throughout the Range class. - * random/F.h: Add missing blitz:: namespace qualifier to huge() - and tiny(). - * random/beta.h: Add missing blitz:: namespace qualifier to huge() - and tiny(). - - -2008-04-09 Julian Cummings - - * random/mt.h: Eliminate warnings about signed/unsigned - comparisons by using size_type and difference_type as defined - by the std::vector class. - -2008-04-06 Theodore Papadopoulo - - * blitz/array/iter.h: Update the iterator_category to be - of type bidirectional_iterator_tag since this is implemented - since 2007-10-02. - -2008-04-06 Theodore Papadopoulo - - * examples/rand2.cpp: Correct the call to seed that was wrong - since the 2008-02-19 modification. - -2008-02-20 Julian Cummings - - * blitz/array/methods.cc: Optimizations for reference counting and - mutex locking as suggested by Patrik Jonsson . - Add weakReference() method that makes the Array reference another - Array, but without any refernce counting. This is meant for cases - in which the Array reference is local to a thread and the - referenced Array is global and persists beyond the lifetime of the - thread. Also replaced MemoryBlockReference with the - T_base typedef in a few places. - * blitz/array-impl.h: Optimizations for reference counting and - mutex locking as suggested by Patrik Jonsson . - Add threadLocal() method that allows the user to declare an Array - as being local to this thread, meaning that there is no need to - perform mutex locking on the reference count for the Array's - memory block. Also replaced MemoryBlockReference with - the T_base typedef in a few places. - * blitz/memblock.cc: Removed declaration of static - NullMemoryBlock. - * blitz/memblock.h: Optimizations for reference counting and mutex - locking as suggested by Patrik Jonsson . - Eliminate use of static NullMemoryBlock and UnownedMemoryBlock - entirely. These cases are handled by not allocating the block_ - pointer. Introduce a locking policy boolean flag that the user - can modify that enables or disables mutex locking of the block's - reference count, which can speed up cases in which an Array is - local to a thread. Also moved several MemoryBlockReference - methods into the protected and private areas to prevent unwanted - use from outside the Array class. - - -2008-02-19 Julian Cummings - * random/Makefile.am: Add new file mtparam.cc to list of random - headers. - * random/discrete-uniform.h: Correct a glitch from previous - commit. - * src/globals.cpp: Patch for parallel random number generator - implementation and updated seed routines from Patrik Jonsson - . The parameter sets in random/mtparam.cc - were calculated by members of the CUDA GPU programming forum using - the dcmt0.4 library. - * blitz/mathfunc.h: Include cstdlib header file for gcc 4.3 - compatibility to get declaration of std::abs(int) function. - * blitz/funcs.h: Include cstdlib header file for gcc 4.3 - compatibility to get declaration of std::abs(int) function. - - -2008-02-20 Patrick Guio - - * random/exponential.h random/normal.h: - Added missing default constructors for ExponentialUnit and - NormalUnit classes. - * random/uniform.h: - Removed unecessary semi-colon at the end of the definition of - the inlined default constructor of UniformClosedOpen class. - -2007-12-03 Patrick Guio - * doc/arrays-expr.texi doc/arrays-io.texi doc/arrays-slicing.texi: - Replaced statement @strong{Note:} by @strong{Caution:} as it is - mentionned in the texinfo documentation: "Do not use @strong with - the word ‘Note’; Info will mistake the combination for a cross - reference". - * doc/examples/Makefile.am doc/stencils/Makefile.am: Removed .texi - and .out files from the default all-am target. Those are generated - only if a documentation is requested using one of the targets for - document (info, dvi, html, ps, pdf and install-* forms). - * doc/doxygen/Makefile.am: Added dvi target. - * configure.ac: Set automake option no-installinfo to prevent - default build and install of doc/blitz.info target as - makeinfo might no be installed. In order to build/install - doc/blitz.info use make targets info/install-info. - * doc/doxygen/Doxyfile.in: Updated to doxygen version 1.5. - * m4/ac_prog_doxygen.m4: Required doxygen version 1.5. - - -2007-11-20 Patrick Guio - * doc/arrays-expr.texi: Fixed code that triggered errors with - makeinfo --html. - * configure.ac: Test availability of texi2html and set up an - automake conditional. - * doc/Makefile.am: Removed html ps and pdf files from install - target. Instead use the targets install-html, install-ps and - install-pdf as described in automake doc. Removed generated - files from the EXTRA_DIST variable. Use texi2html only when - available, otherwise makeinfo --html (if available). - -2007-10-02 Julian Cummings - - * benchmarks/iter.cpp: Call end() method only once per loop. - * testsuite/iter.cpp: Added code to test operator--(). - * blitz/array-impl.h: Call the new constructor for end iterator, - which has an extra dummy int argument. Also made some format - changes for consistency. - * blitz/array/iter.h: Added decrement operator-- as suggested by - Theodore Papadopoulo and - improved debugging code. - - -2007-09-25 Julian Cummings - * blitz/blitz.h: Remove implicit definition of BZ_THREADSAFE. - This is now handled by the configure script (or equivalently by - defining BZ_THREADSAFE explicitly in the compiler-specific - config.h header or on the compile command line). Also fixed - inconsistent line endings in this file. - * configure.ac: Add configure option --enable-threadsafe to turn - on Blitz thread-safety features by defining the BZ_THREADSAFE - macro. If enabled, code will look for pthreads, OpenMP or Windows - thread support to implement mutex locking of sensitive variables. - VS.NET users must define BZ_THREADSAFE manually in the - blitz/ms/config.h file. - - -2007-09-19 Julian Cummings - * blitz/memblock.h: Remove include of pthread.h header here. - Thread support is now handled only in the blitz.h header. - * blitz/blitz.h: Added patch from David Blankenship to provide - blitz mutex support and thread safety with OpenMP or Windows - threads, in addition to the original support for pthreads. Define - _OPENMP to use OpenMP support. - * blitz/array/domain.h: Rename function argument "bounds" to avoid - conflict with type blitz::bounds, which is defined in - blitz/array/expr.h. - * blitz/array/expr.h: Rename function argument "pair" to avoid a - conflict with type std::pair, which is imported into the blitz - namespace. - - -2007-09-05 Julian Cummings - * testsuite/tinyvec.cpp: Added some testing of dot(), product(), - and sum() functions for tinyvector. - * blitz/Makefile.am: Add new header file vecproduct.cc to Makefile - list. - * blitz/vecglobs.cc: Include new file blitz/vecproduct.cc for - definitions of product() function. - * blitz/tvecglobs.h: Use BZ_SUMTYPE macro to obtain the proper - return type for tinyvector sum. - * blitz/meta/sum.h: Use BZ_SUMTYPE macro to obtain the return type - for tinyvector sum. - * blitz/array/convolve.cc: Simple fix suggested by William - Gallafent (william@gallaf.net) to allow convolution code to work - when elements are multicomponent types. - - -2007-05-29 Theodore Papadopoulo - * benchmarks/iter.cpp: Added. - * benchmarks/Makefile.am: Added the test iter.cpp. Simplify the - Makefile. - * blitz/compiler.h: New macros BZ_LIKELY and BZ_UNLIKELY. These - are currently nops and are just used as markers indicating tests - for which giving hints to the compiler improves the speed and/or - code simplification. - * blitz/array/iter.h: Change the iterator algorithm. This removes - the need for ConstPointerStack which is moved to fastiter.h and - gives an iterator class that is smaller, simpler and has a small - speed advantage. - * blitz/array/fastiter.h: Moved the helper class ConstPointerStack - to here as its sole use is now in this file. - -2007-07-16 Julian Cummings - - * blitz/bzdebug.h: Clean up standard header file includes and use - std qualifier on items from standard C++ library such as cerr and - endl. - * blitz/blitz.h: Clean up standard header file includes a bit. - * testsuite/testsuite.h: Move include of stdlib.h header to - blitz/bzdebug.h. Include std qualifier on exit() function call. - * testsuite/theodore-papadopoulo-1.cpp: Correct expected return - type for call to max() function. - * blitz/range.h: Add an exception to the check for stride evenly - dividing into the range, if the range is open-ended (i.e., if - first_==fromStart or last_==toEnd). - * examples/complex-test.cpp: Use double-precision floating-point - math consistently. - * examples/matmult.cpp: Use single-precision floating-point math - consistently. - * blitz/array/map.h: Change return type of operator* from int to - T_numtype to eliminate compiler warning about type - conversion. Note that this method should never be called in - practice, so the return type is not important. - -2007-05-31 Julian Cummings - - * blitz/range.h: Set Range tag toEnd to the value INT_MAX rather - than INT_MIN, so that it is distinct from the value of the tag - fromStart. - -2007-05-29 Theodore Papadopoulo - * m4/ac_cxx_math_absint_in_namespace_std.m4: New test that verifies - whether abs(integer_type) requires the use of cstdlib and namespace - qualification. - * m4/ac_cxx_have_cstring.m4: New test that checks the availability - of cstring. - * m4/ac_cxx_standard_library.m4: Added calls to the two new tests. - * blitz/tinyvec.h: Include cstring if available (needed for memcpy). - * blitz/compiler.h: Added macros for accessing the abs(int) functions. - * blitz/funcs.h: Use the new macros of compiler.h. - * blitz/mathfunc.h: Use the new macros of compiler.h. - -2007-03-16 Julian Cummings - - * blitz/veciter.h: Correct minor logic bug in operator+(int) - method for VectorIter and VectorIterConst to allow return of an - "end" iterator with zero length. This fixes a reported failure - with the blitz testsuite code initialize.cpp. - -2007-03-12 Julian Cummings - - * blitz/compiler.h: Corrected minor error in definition of - BZ_TEMPLATE_DEFAULT macro. - -2006-12-22 Julian Cummings - - * blitz/veciter.h: Forgot to add constructor for VecIterConst that - takes data pointer, stride, and length parameters. This is needed - by operator+(). - - * blitz/veciter.h: Added precondition check for operator+ in - VecIter class and added this operator to VecIterConst class as - well. -2006-09-27 Julian Cummings - - * blitz/array/map.h: Modified ordering() method for - ArrayIndexMapping class to ignore ordering result from a 1d source - Array. A 1d Array will always report an ordering of 0 for - whichever rank matches the index mapping, but this is not relevant - to the ordering of the final array expression, which may be - multidimensional. Basically, you cannot have a unique ordering - unless you have more than one dimension in the Array, so we ignore - the arbitrary ordering value of 0 and return INT_MIN as a flag - instead. - -2006-09-13 Julian Cummings - - * m4/ac_fortran_flags_preset.m4: Add -fno-second-underscore flag - for PathScale Fortran compilers so that the Fortran external - symbol translation is in the expected form. This allows the blitz - benchmark codes to compile properly under PathScale. - * blitz/bzconfig.h: Changed PathScale compiler test to look for - __PATHCC__ instead of __PATHSCALE__ and move this test above the - test for GCC, so that the compiler is properly identified. - Changes suggested by Patrik Jonsson (patrik@ucolick.org). -2006-08-22 Patrick Guio - - * m4/ac_compiler_specific_header.m4: Revived Fujitsu - compiler case $CXX==*CC* and $target==*fujitsu* - and deactivated the other case $CXX==*FCC* (which implies - that the Fujitsu compiler FCC running on a non-Fujitsu - target marchines would not be recognised). - - -2006-08-22 Patrick Guio - - * m4/ac_compiler_specific_header.m4: Deactivated temporarly - redundant following statements for $CXX==*CC* and - $target==*fujitsu*: - AX_PREFIX_CONFIG_H([blitz/fujitsu/bzconfig.h],[BZ]) - COMPILER_SPECIFIC_HEADER="fujitsu/bzconfig.h" - that triggered an error when rebuilding configure with - autoreconf ver 2.60. - - -2006-05-31 Julian Cummings - - * examples/io.cpp: Minor change to format of program output to - enhance readability. - * blitz/matrix.cc: Make Matrix output format similar to that of 2D - Array. We output the extents of the Matrix first, followed by a - space-delimited list of elements enclosed in brackets. There is a - newline at the end of each row of elements. Added a compatible - input operator as well. - * blitz/matrix.h: Added declaration of operator>> for inputting - Matrix from an istream. - * blitz/vecio.cc: Make Vector output format similar to 1D Array. - We write out the Vector length, followed by a space-delimited list - of elements enclosed in brackets. Added a compatible input - operator. - * blitz/vector.h: Added declaration of operator>> for inputting a - Vector from an istream. - * blitz/array/io.cc: Array i/o format updates from Sergei - Mingaleev (mingaleev@gmail.com). We now print the complete Array - bounds (not just the extents) before the Array data, so that an - Array being read in can reset its indices to match the bounds - rather than assuming zero-based indexing. The Array data is given - in a space- delimited list enclosed in brackets, with a newline at - the end of each row of elements. This format is more compact and - hopefully more readable. - * blitz/array-impl.h: Removed the declaration of the operator<< - specialization for N_rank=2. - * blitz/tinymatio.cc: TinyMatrix i/o updates from Sergei Mingaleev - (mingaleev@gmail.com) to output TinyMatrix as a comma-separated - list of elements enclosed in parentheses. The rows are separated - by a semicolon. The number of rows and columns are no longer - written out, since you cannot resize a TinyMatrix dynamically - anyway. - * blitz/tinyvecio.cc: TinyVector i/o updates from Sergei Mingaleev - (mingaleev@gmail.com) to output TinyVector as a comma-separated - list of elements enclosed in parentheses. The vector length is no - longer written out, since you cannot resize a TinyVector - dynamically anyway. This format makes a 2D TinyVector of doubles - look the same as a complex, which can be useful in some - situations. -2006-05-19 Julian Cummings - - * blitz/array/reduce.cc: In the implementation of the full array - reduction, set the initial value to the lower bound of the valid - index range in the case of minIndex() or maxIndex(). This will - correct a problem in the case where all elements are equal to the - min or max value as reported by numeric_limits. - * blitz/array/reduce.h: Added a functor _bz_ReduceReset that - passes an initial index value to the reset() method of the - reduction operator if it will accept one. This functor is used in - the implementation of the partial array reduction to set the - initial value to the lower bound of the valid index range in the - case of minIndex() or maxIndex(). This will correct a problem in - the case where all elements are equal to the min or max value as - reported by numeric_limits. - * blitz/reduce.h: Modified ReduceMinIndex and ReduceMaxIndex (and - the Vector versions of these) to accept an initial value for the - result index. This allows us to set the default result to - something reasonable in the event that no minimum or maximum value - is found. -2006-05-17 Julian Cummings - - * blitz/reduce.h: Corrected setting of canProvideInitialValue in - ReduceMin operator. -2006-02-03 Julian Cummings - - * m4/ac_compiler_specific_header.m4: Add support for recognition - of Fujitsu CC compiler. - * m4/ac_cxx_flags_preset.m4: Update Cray C++ compiler flag - presets. - * m4/ac_fortran_flags_preset.m4: Add flag presets for Fujitsu frt - compiler. Rename Cray ftn compiler and update flag presets. - Allow for some additional compiler name variants with PathScale - and PGI Fortran compilers. - * configure.ac: Add Fujitsu compiler frt to the F77 and F90 - compiler search lists. Replace old Cray fort77 compiler name with - new name ftn, which does both F77 and F90. Rearrange search lists - to push older g77 compiler and other obsolete compilers towards - the end. We prefer vendor-supplied compilers where available. In - most cases, user will set the compiler names anyway. - * benchmarks/loop1.cpp: Make clear that first argument to - initializeRandomDouble function should be a data pointer (i.e., an - ordinary C array). - -2006-01-22 Patrick Guio - - * doc/arrays-storage.texi: Fixed a typo in section 2.9.1, - ColumnMajor<> should read ColumnMajorArray<>. Reported by - Christian Fuchs . - -2006-01-20 Julian Cummings - - * blitz/array/funcs.h: Moved declaration of binary min/max - functions for blitz Arrays here. Use newly defined Min and Max - applicator objects rather than the old _bz_Min and _bz_Max - applicators from blitz/applics.h. These new applicators are - consistent with the "new" ET style and contain proper support for - the prettyPrint() function. - * blitz/array/ops.h: Moving declaration of binary min/max - functions for blitz Arrays to blitz/array/funcs.h, since these are - functions rather than operators. - * blitz/funcs.h: Added functors Min and Max to implement blitz - min/max binary functions within "new" ET style. Somehow the - applicators for these two functions were never translated from the - "old" ET system over to the new one. Using the old applicators - with BZ_DEBUG defined led to compilation errors with the - prettyPrint() function. - -2006-01-13 Julian Cummings - - * blitz/array/storage.h: Modified code for - allRanksStoredAscending() method as proposed by Eddie Breeveld - (Eddie.Breeveld@eu.watsonwyatt.com) to eliminate warnings from the - Microsoft compiler about converting bool to int. - -2006-01-12 Julian Cummings - - * m4/ac_cxx_flags_preset.m4: Removed -D flags for defining special - preprocessor symbols when using XL C++ compiler on Mac OS X or AIX - systems or when using PathScale C++ compiler. No longer needed, - since we now use predefined symbols to detect these compilers. - - * blitz/bzconfig.h: Switched name of preprocessor symbol used to - detect Intel compiler from __ICC to __INTEL_COMPILER. Some - installations of the Intel compiler still define __ECC instead of - __ICC, but using __INTEL_COMPILER appears to work for all Intel - compiler versions. Also switched symbols for XL C++ on Mac OS X - and on AIX and for PathScale C++ compiler to more standard names - already predefined by the compiler. - -2005-12-20 Patrick Guio - - * m4/ac_check_blitz.m4 ac_cxx_lib_blitz.m4: - Replaced macro AC_CXX_LIB_BLITZ by AC_CHECK_BLITZ. - Set AC_SUBST variables BLITZ_CPPFLAGS, BLITZ_LDFLAGS and BLITZ_LIBS - instead of appending to variables CPPFLAGS, LDFLAGS and LIBS. - Added arguments [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]. - * lib/Makefile.am: - Defined libtool variable _LIBADD for any extra libs that might - be necessary to compile with -lblitz (e.g. -lm). - -2005-11-21 Julian Cummings - - * testsuite/contiguous.cpp: Changed fortranArray tag to - columnMajorArray tag, in order to provide quick test of new global - tag for arrays with column major storage. The fortranArray tag is - already exercised elsewhere in the testsuite. - * blitz/array/storage.h: Added a global tag columnMajorArray for - aid in creating Arrays with column major storage (but with base - index 0, rather than 1 as with FortranArray). This addition was - suggested by Christian Fuchs (fuchs@lsm.iet.mavt.ethz.ch). - * examples/indirect.cpp: Added quick test of setting array values - to constant using container of ints. Addresses problem noted by - Eddie Breeveld (Eddie.Breeveld@eu.watsonwyatt.com). - * blitz/array/fastiter.h: Added version of moveTo() method for - ArrayFastIterator that takes a single scalar int argument for - completeness, even though this case is presently handled by - implicit conversion of the int argument to a TinyVector. - This version of moveTo() will be invoked by indirection or - stencils involving 1D Arrays. - * blitz/array/expr.h: Added a version of the moveTo() method that - takes a single scalar int argument to class _bz_ArrayExprConstant, - so that expressions like A[I]=0 will work, where A is a 1D Array - and I is a container of ints. The templated version of moveTo() - that takes a TinyVector of ints with arbitrary rank N_rank will - not be instantiated through implicit argument conversion in this - case. I have also modified the moveTo() methods for the other - array expression types (where the precise expression rank is - known) to be non-templated and to expect a TinyVector of ints with - the same rank as the array expression. - -2005-10-31 Julian Cummings - - * blitz/limits-hack.h: Put enclosing parentheses around min and - max to avoid any cpp macro expansion. - * blitz/minmax.h: Put enclosing parentheses around min and max to - avoid any cpp macro expansion. - * blitz/numinquire.h: Put enclosing parentheses around min and max - to avoid any cpp macro expansion. - * blitz/generate/genmatbops.cpp: Put enclosing parentheses around - min and max to avoid any cpp macro expansion. - * blitz/generate/genvecbfn.cpp: Put enclosing parentheses around - min and max to avoid any cpp macro expansion. - * blitz/generate/genarrbops.cpp: Put enclosing parentheses around - min and max to avoid any cpp macro expansion. - * blitz/array/resize.cc: Put enclosing parentheses around min and - max to avoid any cpp macro expansion. - * blitz/array/stencils.cc: Put enclosing parentheses around min - and max to avoid any cpp macro expansion. - * blitz/array/stencils.h: Put enclosing parentheses around min and - max to avoid any cpp macro expansion. - * blitz/array/ops.h: Put enclosing parentheses around min and max - to avoid any cpp macro expansion. - * blitz/array/reduce.h: Put enclosing parentheses around min and - max to avoid any cpp macro expansion. - * testsuite/testsuite.h: Include stdlib.h header instead of - assert.h header for declaration of exit(). Added use of - BZ_STD_SCOPE macro around cout and endl, so that this header does - not require a using directive to work properly. - * testsuite/minmax.cpp: Modified test of blitz::minmax functions - to explicitly check that they still work in the presence of cpp - macros min and max. In this case, you have to put an enclosing - set of parentheses around the min or max invocation to prevent the - cpp macro from being substituted. - -2005-10-21 Patrick Guio - - * doc/doxygen/Doxyfile.in: Enabled TREEVIEW for html documentation. - -2005-10-20 Patrick Guio - - * m4/ ac_cxx_lib_blitz.m4: Added macro definition HAVE_BLITZ if - compilation test successfull. Added support to parse include and library - directories specified independently and as --with-blitz=inc_dir,lib_dir. - -2005-10-19 Julian Cummings - - * examples/erf.cpp: Explicitly include header to get - declaration of non-standard erf(). - * benchmarks/haney.cpp: Add definition of M_PI if not provided by - standard math header. - * blitz/rand-normal.h: Use BZ_MATHFN_SCOPE macro around standard - math functions like sqrt. Removed external include guards. Use - standard header if available. Add definition of M_PI if - not provided by math header. - * blitz/benchext.cc: Use BZ_MATHFN_SCOPE around standard math - functions like pow. - * blitz/bench.h: Use BZ_MATHFN_SCOPE macro around standard math - functions like pow. Removed external include guards. Use - standard header if available. - * blitz/array/cgsolve.h: Use BZ_MATHFN_SCOPE macro around standard - math functions like fabs. - * m4/ac_cxx_flags_preset.m4: Added missing debug flag -DBZ_DEBUG - for SunPRO C++ compiler, which was reported by Paul Floyd - (paul.floyd@laposte.net). -2005-10-17 Julian Cummings - - * blitz/tinyvec.cc: Fixed major screwup in blitz 0.9 release. - Changed beginfast to beginFast! -2005-10-14 Julian Cummings - - Tagging repository contents with Blitz_0_9 tag. - * m4/ac_compiler_specific_header.m4: Added support for PathScale - pathCC compiler. - * doc/download.texi: Added reference to the newer blitz project - page on SourceForge. - * doc/help.texi: Updated the Help page to refer to the newer - SourceForge project page and mailing lists for blitz. - * doc/install.texi: Minor updates to the notes on configuration - and installation of blitz. - * doc/platforms.texi: Updates to platform and compiler notes. I - tried to add a comment for every platform/compiler combination - that has active or historical support. - -2005-10-13 Julian Cummings - - * doc/faq.texi: Added a few items that have been asked about a lot - over the past year, including configuration under Mac OS X, the - Fortran compiler requirement, and the need to include the - header. - * doc/arrays-globals.texi: Added information on the newer global - Array functions swap() and find(). - * blitz/array-impl.h: Slight simplification of swap() function. - * configure.ac: Changed blitz version number from 0.8.1 to 0.9 as - we agreed upon. We will reserve use of the patch number only when - providing a single critical patch. - * examples/random.cpp: Added a few lines of code to briefly - exercise the new get/set state interface. - * random/mt.h: Added implementation of get/set state for the - MersenneTwister IRNG, as suggested by Patrik Jonsson - (patrik@ucolick.org), with minor corrections and modifications. - * random/default.h: Added interface for getting or setting the - state of the IRNG, as suggested by Patrik Jonsson - (patrik@ucolick.org). - -2005-10-13 Patrick Guio - - * doc/Makefile.am: Changed options to texi2html for compatibility - to both v1.64 and v.176. - - -2005-10-11 Julian Cummings - - * examples/.cvsignore: Added matlab output files to cvs ignore - list. - * doc/doxygen/.cvsignore: Added doxygen-warning message file to - ignore list. - * .cvsignore: Added older ChangeLog_ files to ignore list. - * blitz/.cvsignore: Oops! I put the ChangeLog_ stuff in the wrong - cvsignore file. - * blitz/.cvsignore: Added old ChangeLog_ files and pathscale - compiler directory. - * blitz/memblock.h: Removed the restrict qualifier from a few - places where it was being used inappropriately with pointers as - function parameters. These were flagged by the IBM xlC compiler. - Basically, you cannot assign a restricted pointer to another - pointer that is in an outer (containing) scope because you may be - creating an alias outside of the current scope. - -2005-10-11 Patrick Guio - - * doc/Makefile.am: Added option to specify the . location for - HTML files generated by texi2html since the newer texi2html v1.76 - provided by Fedore Core 4 creates a subdirectory to put these - files by default. - * doc/random.texi: Fixed @math expression that caused trouble - to newer texi2html v1.76 into conditional expression statements - @iftex/@ifinfo/@ifhtml. - * doc/blitz.texi: Moved texinfo's @setcontentsaftertitlepage - command into @iftex block since it caused trouble to newer - texi2html v1.76. - - -2005-10-10 Julian Cummings - - * benchmarks/loopstruct.cpp: Added hack for Compaq cxx compiler, - which does not provide support for writing out the long double - type in std ansi mode. - * blitz/timer.h: Added hack for Compaq cxx compiler, which does - not provide support for writing out the long double type in std - ansi mode. - * doc/doxygen/Makefile.am: Added an uninstall-hook target to clean - up doxygen install dir. - * configure.ac: Added Pathscale compilers to default compiler - search lists. Bumped blitz package version number to 0.8.1. A - few more minor fixes and doc updates before we actually release - the new version, but we're close now! - * blitz/bzconfig.h: Added section for Pathscale compiler-specific - header. - * m4/ac_fortran_flags_preset.m4: Added support for Pathscale - pathf90 compiler. - * m4/ac_cxx_flags_preset.m4: Added support for Pathscale C++ - compiler (pathCC). - * blitz/Makefile.am: Added list of compiler-specific header files - to list of files to be removed by distclean. One - compiler-specific header file will always be present within the - distribution. - * Makefile.am: Added pkgconfig data files to list of files to be - removed by distclean. - -2005-10-07 Julian Cummings - - * blitz/array/newet-macros.h: Greatly simplified the definitions - of the BZ_DECLARE_FUNCTION macros by utilizing the previously - defined macros BZ_DEFINE_UNARY_FUNC, BZ_DEFINE_BINARY_FUNC, etc. - This follows a suggestion from Peter Kummel - (syntheticpp@gmx.net). - * blitz/funcs.h: Added macros BZ_DEFINE_TERNARY_FUNC and - BZ_DEFINE_TERNARY_FUNC_RET for completeness, although there are no - standard library math functions that require such macros. - Reordered to list complex unary funcs after all ordinary unary - funcs and complex binary funcs after all ordinary binary funcs. - Renamed macro BZ_DEFINE_BINARY_CFUNC2 to - BZ_DEFINE_BINARY_FUNC_CRET in order to (hopefully) clarify that - this is for a binary func that takes ordinary floating-point args - and returns a complex type. - * blitz/array-impl.h: Added a find() function along the lines of - what was suggested by Jonathan Stickel (jjstickel@sbcglobal.net) - as an analogue to the Matlab find(). This function takes a 1d - Array of TinyVector indices and an Array or _bz_ArrayExpr, and it - stores all the index positions where the Array or expression is - true. Also, I removed the annoying external include guards here - (need to do this universally throughout blitz at some point). - * testsuite/tinyvec.cpp: Added minimal testing of begin()/end() - method and TinyVector iterators. - * m4/ac_cxx_flags_preset.m4: Remove flag that was disabling use of - new type promotion system with icpc compiler. This problem should - now be fixed. - * blitz/promote.h: Substituted for defaultPromotion in expression - for promoteToT1 in order to eliminate an error from the Intel - compiler on this file. The icpc compiler thought the expression - was not a compile-time constant value. This will allow us to once - again use the newer type promotion system with icpc. - -2005-10-06 Theodore Papadopoulo - - * blitz/bzconfig.h: Modify include paths to have the blitz/ - prefix. Use angled brackets instead of quotes around header file - name. Added error message if compiler unknown. - * lib/Makefile.am: Simplified include file search path flags. - * blitz/array/domain.h: Introduces an empty constructor and a - constructor from a vector of Ranges. New typedef used to simplify - the code. Constification of many methods. Added non-const ubound - and lbound methods for RectDomain. Removed external include - guards. - * blitz/array-impl.h: Allow components to be referred with - unsigned. - -2005-10-06 Julian Cummings - - * blitz/generate/genpromote.cpp: Eliminate gcc-4.0.0 compiler - warning about using an anonymous struct (thanks Theo!). - * blitz/generate/Makefile.am: Repaired clean-local target. Added - header file dependencies. - * blitz/generate/operands.h: Changed initialization method for - vector types from begin() to beginFast() to avoid conflict with - standard begin() method that is expected to produce an STL - iterator. - * blitz/vecexpr.h: Change begin() to beginFast(). - * blitz/vecpick.cc: Change begin() to beginFast(). - * blitz/vecpick.h: Change begin() to beginFast(). - * blitz/vector.cc: Replaced begin() with beginFast(). - * blitz/vector.h: Changed begin() to beginFast(). Need to add an - STL-compliant iterator for Vector. - * blitz/tinyvec.cc: Replace begin() with beginFast(). - * blitz/tinyvec.h: Renamed pre-existing begin() method beginFast() - to avoid conflict with standard begin() method that is presumed to - deliver an STL-compliant iterator. TinyVectorIter and - TinyVectorIterConst are not STL compliant at all and are really - just glorified indices. Define begin() and end() to return raw - data pointers instead, since these will have the STL iterator - semantics automatically. - -2005-10-03 Julian Cummings - - * README-VS.NET.txt: Updated README file for VS.NET stuff to - mention new Blitz-Examples solution file. -2005-09-29 Theodore Papadopoulo - - * blitz/tinyvec.h: Constified many methods. Added a end() method. - Returning const references insteadof copies for operator() const - and operator[] const. - - * blitz/tinyvec.cc: Constified many methods accordingly. - -2005-08-23 Julian Cummings - - * testsuite/complex-test.cpp: Added some tests of math ops with - complex type. - - * blitz/array/ops.h: Added missing support for math operations - between complex Array and scalar types. - -2005-07-27 Julian Cummings - - * blitz/memblock.h: Fixed problem with the deleteDataWhenDone - policy. It was not actually deleting the data as expected. - Solution was to set dataBlockAddress_ pointer to the address of - the preexisting memory in MemoryBlock constructor. In subclass - UnownedMemoryBlock constructor, we reset this pointer to 0 to - indicate that any preexisting memory is not our responsibility. - * Makefile.am: Use new FORTRAN_ENABLED conditional to only build - in the benchmarks directory if Fortran compilation is enabled. - * configure.ac: Added conditional to indicate whether Fortran - compilation (needed for the benchmark codes) has been enabled. - * blitz/array/map.h: Added return of dummy T_numtype object in - generic version of map method to eliminate possible compiler - warning about no return statement in non-void function. - -2005-07-07 Julian Cummings - - * blitz/memblock.h: Removed the swap() method that was added here. - The Array method reference() uses the existing - MemoryBlockReference method changeBlock() to implement a swap of - the memory block in a safer manner with reference counting. - * blitz/array-impl.h: Modified Theo's implementation of the swap() - function to use the existing reference() method. This is easier - and does not require the swap() function to be a friend of the - Array class. The MS VS.NET C++ compiler was confused by the - tricky syntax for templated functions that are friended and using - the friend feature of C++ is a bad idea anyway. There may be a - tiny bit more overhead with this implementation because it - constructs a null temporary Array, but the temporary allocates no - space for data elements, only for Array metadata and the Array - ordering description. - -2005-06-16 Julian Cummings - - * m4/ac_cxx_template_qualified_base_class.m4: Fixed up the - compiler test for support of template-qualified base classes to - work properly under the new gcc 4.0.0 compiler by adding the full - template specialization syntax and explicitly qualifying the call - to f in the base class as required by the ANSI standard. - -2005-06-16 Theodore Papadopoulo - - * blitz/memblock.h: Added support for swapping memblocks. - * blitz/array-impl.h: Added support for swapping arrays using the - memblock swapping. - -2005-06-02 Julian Cummings - - * blitz/vecmax.cc: Put parentheses around "max" in definitions of - max() function for Vector-like objects, to avoid triggering any - max macro that may be defined. A workaround for sloppiness in - boost and Windows header files. - - * blitz/vecmin.cc: Put parentheses around "min" in definitions of - min() function for Vector-like objects, to avoid triggering any - min macro that may be defined. A workaround for sloppiness in - boost and Windows header files. - -2005-05-24 Julian Cummings - - * blitz/array/ops.h: Use new BZ_DECLARE_ARRAY_ET_BINARY_TINYVEC - macro to declare the standard binary math operations between an - Array-like object and a TinyVector. The TinyVector is treated - like a scalar in these operations, interacting with each element - of the Array separately. - * blitz/array/newet-macros.h: Added macro - BZ_DECLARE_ARRAY_ET_BINARY_TINYVEC to generate code for binary - math operations between any Array-like type and a TinyVector. The - TinyVector is treated as a scalar-like type and the operation is - performed between the TinyVector and each element of the - Array-like object. The definitions for binary operations between - the TinyVector and the Array element type should be provided - elsewhere in . - - * blitz/array/iter.h: Modified postfix operator++ for - ConstArrayIterator and ArrayIterator to return the iterator object - by value rather than by const reference, since you are returning a - local copy of the iterator before it was incremented. Some - compilers such as the MS VS.NET 2003 C++ compiler would crash if - you tried to use the const reference that was returned previously. -2005-05-18 Julian Cummings - - * blitz/mathfunc.h: Replaced broken test "#ifdef isnan" with a - test for the preprocessor symbol defined by the new - AC_CXX_ISNAN_IN_NAMESPACE_STD autoconf macro. If the isnan - function is provided in namespace std, we invoke it as std::isnan - in order to avoid problems with using the C99 isnan macro from - within the blitz namespace. - * blitz/funcs.h: Replaced broken test "#ifdef isnan" with a test - for the preprocessor symbol defined by the new - AC_CXX_ISNAN_IN_NAMESPACE_STD autoconf macro. If the isnan - function is provided in namespace std, we invoke it as std::isnan - in order to avoid problems with using the C99 isnan macro from - within the blitz namespace. - * blitz/blitz.h: Only include header explicitly if the - test for standard math functions in the std namespace failed. - Otherwise, it will normally be included implicitly by the - header. - * m4/ac_cxx_math_fn_in_namespace_std.m4: Added an AC_REQUIRE of - AC_CXX_NAMESPACES here, since C++ namespaces are a prerequisite - for this test. - - * m4/ac_cxx_standard_library.m4: Added invocation of new macro - AC_CXX_ISNAN_IN_NAMESPACE_STD that checks if the isnan function is - provided in the std namespace when you include the header. - If it is, we will use std::isnan when calling this function from - within the blitz namespace to avoid problems with the C99 isnan - macro. -2005-05-13 Julian Cummings - - * testsuite/promote.cpp: Used cpp directives to skip some code - that will not work under the old type promotion system. - - * m4/ac_cxx_flags_preset.m4: Just discovered that the Intel C++ - compiler is unhappy with the change from unnamed enums to static - const bool. It gives an error on one particular initialization in - . I have reported this to Intel as a bug in the - compiler, since other compilers seem to accept it. As a temporary - solution, I am adding -DBZ_DISABLE_NEW_PROMOTE to the preset flags - for icpc. This will force the usage of the older code in - promote-old.h. This is just to allow the library and testsuite - codes to compile under icpc again. I will undo this change as - soon as we determine what's going wrong here. - - * blitz/promote.h: Some additional cleanup after migrating from - anonymous enum types to static const types. Changed promoteToT1 - from int to bool and simplified expression for promoteToT1. -2005-05-06 Julian Cummings - - * blitz/array-impl.h: Converted most unnamed enums into static const - int or bool types where appropriate, in order to resolve compilation - problems under gcc-4.0.0. This new compiler complains whenever it - sees an unnamed enum being used in an expression where there has been - a templated version of the operator in the expression already declared. - It tries to instantiate the templated operator with the unnamed enum, - which is illegal and results in a compile error. Very annoying! In - any case, I also removed the usages of BZ_ENUM_CAST that were no longer - necessary, since we are now using real types. This is probably a safer - way to write the code anyway... -2005-04-20 Julian Cummings - - * m4/ac_fortran_flags_preset.m4: Added the proper flag for Fortran - external symbol translation when using the Intel Fortran compiler. - Moved the AM_CONDITIONAL for F90_COMPILER into configure.ac so - that it is always defined. - * m4/ac_cxx_flags_preset.m4: Use -ansi flag with Intel compiler - for now. The -strict_ansi flag does not work with the newer gcc - header files, but -ansi is better than nothing. Also fixed the - profiling flag for the Intel compiler. - - * configure.ac: Added option --disable-fortran that allows user to - skip the Fortran compiler configuration. With this option, the - banchmarks directory will not be configured. The default is - --enable-fortran. I also moved the C++ compiler configuration - ahead of the Fortran configuration, in case Fortran is skipped. -2005-04-14 Julian Cummings - - * m4/ac_cxx_flags_preset.m4: A few modidfications to the GNU and - IBM XL C++ compiler optimization flags. - - * m4/ac_fortran_flags_preset.m4: Added cases to specifically - handle the darwin build target. Added cases for F77=g95, which - can be useful on Darwin platform. Generalized all Fortran - compiler names. A few modifications to the GNU and IBM XL Fortran - compiler optimization options. - - * configure.ac: Added g95 compiler to the Fortran 77 compiler - search list. This is primarily for the benefit of users running - on the Darwin (Mac OS X) platform. These systems often do not - come with the g77 compiler installed, and it can be difficult to - install g77 and g++ at the same version level. Mixing different - versions of g77 and g++ can lead to link-time errors in the blitz - benchmark codes. Using g95 as both the Fortran 77 and Fortran 90 - compiler offers another solution. Note that use of the g95 - compiler under Mac OS X requires cctools-528. Info on how to - install this is at www.g95.org. -2005-04-06 Julian Cummings - - * m4/ac_cxx_flags_preset.m4: Added case for solaris target with CC - compiler (SunPRO). The preset flags have been verified to work - with Sun Studio 10, C++ version 5.7. Also added handling of c++ - compiler name as pseudonym for g++ and reordered the cases - somewhat. I am also removing the -strict_ansi flag with the Intel - C++ compiler for now, since this has been reported to cause - trouble with newer versions of icc and gcc. - * m4/ac_compiler_specific_header.m4: Added case for SunPRO CC - compiler (CC on solaris target) and handling of c++ compiler name. - Generalized all compiler names. - * configure.ac: Inserted c++ into C++ compiler search list, so - that Darwin systems (which are case-insensitive) will pick up c++ - (a symbolic link to g++) rather than CC (which is really cc, a - symbolic link to gcc). We need c++ in front of CC in the search list. - * blitz/bzconfig.h: Added case to check for SunPRO CC compiler and - defer to sun/bzconfig.h. - -2005-04-01 Julian Cummings - - * testsuite/reduce.cpp: Added test of count() function. - * blitz/array/expr.h: Use typedef T_expr consistently inside - ArrayExpr class definition. - -2005-02-23 Patrick Guio - - * blitz/numinquire.h: Added config header file so - that numerical inquiries stuff can be used by just including - . - - -2005-02-08 Julian Cummings - - * configure.ac: Added g95 to the list of FC compilers that are - tried and reordered the list to prefer g95 over gfortran. Also, - reordered the compiler macros to do our compiler flag presets - first, before any other macros that may add more flags. - * m4/ac_fortran_flags_preset.m4: On many Linux systems, f77 points - to the g77 compiler. So I've added a test for this possibility, - instead of just assuming that f77 on Linux systems is the Absoft - compiler. If $G77 equals yes, use the g77 flag presets. Added - flag presets for the PGI F77 and F90 compilers, and for the g95 - compiler. Reverted to the practice of overwriting the default - values of FFLAGS and FCFLAGS because the default values are not - appropriate for benchmark codes. - * doc/stencils/Makefile.am: Removed explicit -lm flag from LDADD. - This is now inserted into the LIBS variable automatically if - needed for standard math functions like sin(). - * benchmarks/Makefile.am: Simplified settings for F90 compilation, - taking advantage of the improved F90 support in automake 1.9 using - the FC variables. - * m4/ac_lib_blas.m4: Replaced FORTLIBS with standard autoconf - variable FLIBS. - * m4/ac_env.m4: Print out settings of FLIBS and FCLIBS rather than - FORTLIBS, which is now obsolete. - * m4/ac_fortran_flags_preset.m4: Removed settings for FORTLIBS - variable. Fortran library loader flags are now handled using - standard autoconf macros. Separated settings for F77 and FC - variables and only set FC variables if FC has been set. Ensure - that we add to FFLAGS and FCFLAGS rather than overwriting them. - Added gfortran case for F77 and FC on Linux platform. Added g77 - case for all Unix platforms. - * configure.ac: Advanced required automake version number to 1.9, - which has improved support for F90 compilers via the FC variables. - Added gfortran to the list of supported F77 and F90 compilers. - Use standard autoconf macros to determine the Fortran library - flags to pass to the loader, rather than our ad-hoc variable - FORTLIBS. Simplified test for whether FC has been set. Improved - test for standard math functions like sin() so that -lm is only - explicitly included on the link line if necessary. Removed test - for existence of lib directory, since this is now guaranteed. -2005-02-07 Julian Cummings - - * m4/ac_compilers_64bits.m4: Generalized names of a few other - compilers, notably the SGI and IBM compilers. - - * m4/ac_compiler_specific_header.m4: Generalized names of some C++ - compilers. - - * m4/ac_compilers_64bits.m4: Generalized name of g++ compiler in - case statements. - - * m4/ac_cxx_flags_preset.m4: Generalized name of g++ compiler in - case statements. - -2005-01-28 Julian Cummings - - * doc/stencils/Makefile.am: Modified compilation rules to provide - separate -D flag for defining stencilargs symbol. Breaking this - into two separate preprocessor symbols eliminates confusion that - some compilers were having with parentheses embedded in a -D flag. - Should fix problems with pgCC and xlC. - * doc/stencils/dump-stencil.cpp: Introduce stencilargs symbol to - represent argument(s) of stenciltoapply. - - * blitz/mathfunc.h: Regenerated using genmathfunc.cpp program to - include PGI compiler hacks. - - * blitz/generate/genmathfunc.cpp: Added some lines to skip - pow(float,float) and all transcendental functions acting on - complex if we are using PGI compiler. PGI does not - support complex math functions because of issues with - how some Linux systems define the long double type. The - pow(float,float) function fails because of the presence of - overloaded versions of pow() taking the combination of a float and - a complex. PGI does not provide an explicit - pow(float,float) overload to call powf(). -2005-01-06 Julian Cummings - - * benchmarks/stencilf2.f: Added explicit declaration for local - loop counter variable. - - * benchmarks/stencilf.f: Added explicit declaration for local loop - counter variables. - - * Makefile.am: Removed unneeded compile rules for suffix ".f90.f" - which is not used. - - -2005-01-07 Patrick Guio - - * blitz/array/ops.h: Completed ET(array,scalar) for scalar - of all integral types. - -2004-11-17 Patrick Guio - - * doc/doxygen/Makefile.am: Fixed install related targets to take into - account a possible DESTDIR. - - -2004-11-04 Julian Cummings - - Tagging repository contents with Blitz_0_8 tag. -2004-11-04 Patrick Guio - - * m4/ac_prog_doxygen.m4: Added test on doxygen version. In order - to be used doxygen must be at least 1.3.x. - - -2004-11-03 Patrick Guio - - * m4/ac_prog_doxygen.m4 doc/doxygen/Makefile.am: Renamed automake - conditional tag 'DOC' into more explicit 'DOXYGEN_DOC' as well as - references. - - * doc/doxygen/Doxyfile.in: Upgraded config file to doxygen 1.3.6. - Replaced ../.. by autoconf variables in INPUT tag. - Removed *.cc from the FILE_PATTERNS tag since they are just - implementation. Limited the class to document by explicitly exclude - concerned files using the EXCLUDE tag. Fine tuned some doxygen and - dot behaviour tags. - - * doc/doxygen/Makefile.am: Set docdir to be - $(datadir)/doc/$(PACKAGE)-$(VERSION)/doxygen, indeed a - subdirectory of the blitz root document directory. - Use more systematically available variables. - Cosmetic Changes. - - * TODO: new item: make available RPM as a dist medium. - - -2004-10-29 Julian Cummings - - * doc/doxygen/Makefile.am: Set docdir to - $(datadir)/doc/$(PACKAGE)-doc-$(VERSION) for the doxygen generated - documentation, to distinguish from the texinfo generated docs. - Removed $(VERSION) from names of PS and PDF manuals. There is - still something wrong with the ps and pdf targets. How do you get - doxygen to generate the manuals? Patrick, please fix! - * doc/Makefile.am: Changed docdir to standard value - $(datadir)/doc/$(PACKAGE)-$(VERSION). - - * doc/doxygen/Doxyfile.in: Changed FILE_PATTERNS from *.cpp to - *.cc, since that is the extension used for blitz template - implementation header files. - -2004-10-26 Patrick Guio - - * doc/Makefile.am: Assigned variable docdir the value - $(prefix)/doc/$(PACKAGE) in order to gather blitz documentation in a - subdirectory of the generic doc directory. - * doc/doxygen/Makefile.am: New variable docdir assigned the value - $(prefix)/doc/$(PACKAGE)/doxygen. Used when installing doxygen-generated - documentation in install-data-hook and make-install-dirs targets. - -2004-10-26 Julian Cummings - - * doc/Makefile.am: Added blitz.dvi to EXTRA_DIST, so we don't need - to rebuild it from the blitz.texi file after unpacking the - distribution tarball. - -2004-10-25 Julian Cummings - - * blitz/generate/Makefile.am: Move header-generating programs to - EXTRA_PROGRAMS so they are not built by the default target. They - will only be built if a generated header file needs to be updated. - * blitz/generate/Makefile.am: Made generated header files depend - directly on the source code file needed to generate header, rather - than the executable. Modified build rules for generated headers - to build executable first and then run it to create header. This - should allow the tarball distribution to skip generation of header - files if they are already provided in distribution and are - up-to-date. - - * doc/examples/Makefile.am: Made .out output files depend on .cpp - source file rather than executable. Modified build rule for - output files to first build executable, then run it to produce - output file. With these changes, the tarball distribution can use - the given output files without having to rebuild all the - executables. - - * doc/Makefile.am: Added HTML and image files to doc_DATA and to - EXTRA_DIST, so HTML documentation will be installed and included - in distribution tarball. - -2004-10-22 Patrick Guio - - * doc/Makefile.am: Added blitz.info and blitz.pdf in EXTRA_DIST. - Those files ought to be in the dist tarball since they might not be able - to be regenerated due to not available 'tex' and 'makeinfo' commands. - Added blitz.ps and blitz.pdf in doc_DATA since they are now distributed. - * doc/doxygen/Makefile.am: Removed search option '-s' for doxytag and - replaced by tag option '-t' since '-s' is deprecated in recent doxygen - version. - -2004-10-21 Julian Cummings - - * doc/Makefile.am: Added new PDF versions of the figures to - EXTRA_DIST. Moved blitz.ps from doc_DATA to EXTRA_DIST, so that - TeX is not required for installation. - -2004-10-16 Julian Cummings - - * .cvsignore: Removed _configs.sed. This file is no longer - created. - * m4/ax_prefix_config_h.m4: Removed line that copied sed script to - _configs.sed. The sed script is not needed after the - AX_PREFIX_CONFIG_H macro has executed. - -2004-10-15 Julian Cummings - - * doc/Makefile.am: Added doxygen to SUBDIRS list. Added build - rules for blitz_TEXINFOS files in stencils and examples - subdirectories. This is needed so that the top-level dist-gzip - build target can work properly. - * doc/platforms.texi: Updated information on various - platforms/compilers. Current info on Intel and GNU compilers. - Added section on Microsoft VS.NET 2003. Noted that support for - Metrowerks and Cray C++ compiler is no longer being maintained. - * doc/install.texi: Updated installation and porting instructions - to refer to current approach using autoconf and configure script. - * doc/version.texi: Updated version info. - * blitz/README: Added note and caveat about the new - compiler-specific header file. - * blitz/Makefile.am: Removed the EXTRA_DIST list, since it - referred to files that are no longer in the distribution. - * configure.ac: Bumped version number to 0.8. Added icc to list - of program names for C++ compiler that we try. This name of the - Intel C++ compiler is now deprecated in favor of the name icpc, - which we try first. - -2004-10-14 Julian Cummings - - * configure.ac: Modification suggested by Derrick Bass - (derrick@caltech.edu) that disables use of the F90 compiler if it - does not support free format source code or source code files with - a .f90 extension. - * testsuite/newet.cpp: Fixed typo reported by Derrick Bass - (derrick@caltech.edu). - -2004-10-14 Patrick Guio - - * config/texinfo.tex: Removed from repository since it is generated - when running 'autoreconf -vif'. - - * doc/blitz.info: Removed from repository since it is generated when - making doc. - - -2004-10-13 Patrick Guio - - * m4/ac_compiler_specific_header.m4: New macro "extracted" from - configure.ac to set the compiler-specific header. Added - COMPILER_SPECIFIC_HEADER as a precious variable. Added icc as a - supported compiler (together with icpc). - - * configure.ac: Moved compiler specific-header handling in - m4/ac_compiler_specific_header.m4. - - * blitz/config-ICL.h blitz/config-KCC.h blitz/config-SC4.0.h - blitz/config-SGi.h blitz/config-g++2.7.2.h blitz/config-g++3.h - blitz/config-mwerks.h blitz/config-xlC.h: Removed as rendered - obsolete due to the new form of preprocessor symbol BZ_HAVE_*. - - -2004-10-06 Julian Cummings - - * blitz/Makefile.am: Modified to install the compiler-specific - header file that was created. Added definition of EXTRA_HEADERS - with list of all possible header files of this sort. Seems to be - required by automake. - - * configure.ac: Modified compiler-specific header file logic to - define the name of the header file that is produced as - COMPILER_SPECIFIC_HEADER. We need this in order to tell the blitz - subdirectory Makefile which header to install. - * random/mt.h: Changed BZ_NAMESPACES to BZ_HAVE_NAMESPACES. - * examples/cfd.cpp: Changed BZ_NAMESPACES to BZ_HAVE_NAMESPACES. - * examples/simple.cpp: Use blitz macro for using directive. - * examples/slicing.cpp: Use blitz macro for using directive. - Changed BZ_PARTIAL_ORDERING to BZ_HAVE_PARTIAL_ORDERING. - * examples/tiny.cpp: Use blitz macro for using directive. - * examples/transform.cpp: Use blitz macro for using directive. - * examples/where.cpp: Use blitz macro for using directive. - * testsuite/complex-test.cpp: Changed BZ_HAVE_COMPLEX_MATH to - BZ_HAVE_COMPLEX_FCNS. - - * testsuite/promote.cpp: Changed to new form of preprocessor - symbols. - - * blitz/generate/genvecuops.cpp: Changed to new form of - preprocessor symbol for complex methods. - - * blitz/generate/genmathfunc.cpp: Changed one more instance of - BZ_HAVE_COMPLEX_MATH to BZ_HAVE_COMPLEX. - - * blitz/array/newet-macros.h: Changed preprocessor symbols to new - form. - - * blitz/array/multi.h: Changed BZ_HAVE_COMPLEX_MATH to - BZ_HAVE_COMPLEX. - - * blitz/array/funcs.h: Changed BZ_HAVE_COMPLEX_MATH to - BZ_HAVE_COMPLEX_FCNS. - - * blitz/array/asexpr.h: Changed preprocessor symbols to new form. - * blitz/promote.h: Changed BZ_ preprocessor symbols to new form. - * blitz/numtrait.h: Changed BZ_BOOL to BZ_HAVE_BOOL. - * blitz/generate/genmathfunc.cpp: Fixed up usage of various - preprocessor symbols related to complex type and math functions. - * blitz/funcs.h: Fixed up usage of various preprocessor symbols - related to complex type and math functions. - * blitz/array-impl.h: Changed BZ_PARTIAL_ORDERING to - BZ_HAVE_PARTIAL_ORDERING. - - * blitz/bzdebug.h: Changed BZ_RTTI to BZ_HAVE_RTTI and added use - of BZ_HAVE_STDLIB_H. - - * blitz/Makefile.am: Replaced config.h with bzconfig.h in - blitz_HEADERS list. - - * blitz/compiler.h: Use new master file to - include a compiler-specific config file. Use new version of - preprocessor symbols defined by autoconf macros. - * m4/ac_check_cxx_features.m4: Removed superfluous include guard - insertion into config.h file. Autoconf already adds such an - include guard. - - * m4/ac_cxx_general.m4: Changed AC_CXX_NAMESPACE to standard - AC_CXX_NAMESPACES. - - * m4/ac_cxx_standard_library.m4: Replacing autoconf macro - AC_CXX_HAVE_COMPLEX_MATH with AC_CXX_HAVE_COMPLEX_FCNS and two - macros from the GNU autoconf macro archive, - AC_CXX_HAVE_COMPLEX_MATH1 and AC_CXX_HAVE_COMPLEX_MATH2. The - AC_CXX_HAVE_COMPLEX_FCNS macro tests only for the standard methods - of the complex class such as real() and conj(). The math - function tests are now handled by the standard macros from the GNU - autoconf macro archive. - * m4/ac_cxx_type_promotion.m4: Added a real program to properly - test for the vector type promotion mechanism. Modified to follow - format of m4 macros in GNU autoconf macro archive. Defines - HAVE_TYPE_PROMOTION instead of BZ_HAVE_TYPE_PROMOTION. The BZ_ - prefix will be added using the AX_PREFIX_CONFIG_H autoconf macro. - - * m4/ac_cxx_nceg_restrict_egcs.m4: Modified to follow format of m4 - macros in GNU autoconf macro archive. The BZ_ prefix will be - added to the defined preprocessor symbol using the - AX_PREFIX_CONFIG_H autoconf macro. - - * m4/ac_cxx_math_fn_in_namespace_std.m4: Modified to follow format - of m4 macros in GNU autoconf macro archive. The BZ_ prefix will - be added to the defined preprocessor symbol using the - AX_PREFIX_CONFIG_H autoconf macro. - - * m4/ac_cxx_have_rusage.m4: Modified to follow format of m4 macros - in GNU autoconf macro archive. Defines HAVE_RUSAGE instead of - BZ_HAVE_RUSAGE. The BZ_ prefix will be added using the - AX_PREFIX_CONFIG_H autoconf macro. - - * m4/ac_cxx_have_climits.m4: Modified to follow format of m4 - macros in GNU autoconf macro archive. Defines HAVE_CLIMITS - instead of BZ_HAVE_CLIMITS. The BZ_ prefix will be added using - the AX_PREFIX_CONFIG_H autoconf macro. - - * m4/ac_cxx_flags_preset.m4: Added default flag settings for xlc++ - compiler, which is a version of the IBM C++ compiler made for - Darwin systems. Added -D flags for the IBM and Fujitsu compilers - so we can detect use of these compilers at compile time. Most - other C++ compilers already define a distinctive preprocessor - symbol. - -2004-10-05 Julian Cummings - - * compiler/promote.cpp: This file contained an empty program, so I - wrote an actual test for the vector type promotion mechanism using - traits, as described in "Using C++ Traits Classes for Scientific - Computing" by Todd Veldhuizen (1996). -2004-10-01 Julian Cummings - - * blitz/array/indirect.h: Changes corresponding to those recently - made in cartesian.h and eval.cc. Extended indexSet() method to - accept up to 11 parameters. Replaced data[i] with *data++ in - expression evaluation loops using unit stride. - -2004-09-17 Julian Cummings - - * manual/Makefile.am: Corrected name of arrays-types.yo file. - Added README file to EXTRA_DIST. Moved all of the .yo files and - the helper files used to create the old blitz manual from doc_DATA - to EXTRA_DIST. We install only the old blitz manual itself from - this directory. It is no longer maintained. - - * blitz/array/eval.cc: Replaced data[i] with *data++ in unit - stride loops over data inside various Array expression evaluation - mechanisms. This tends to optimize better under some compilers, - and this change eliminates some unusual bugs in expression - evaluation when using the KCC compiler in optimized mode. This is - an old patch which I forgot to check in. - * blitz/array/resize.cc: Changed precondition on resize() method - to allow extent arguments with the value 0. This will allow the - user to resize an existing Array to size 0, which will cause the - MemoryBlockReference class to call the changeToNullBlock() method - and release its reference to any pre-existing allocated memory - block. We continue to reject an extent of 0 in the - resizeAndPreserve() method, since presumably the user wishes to - preserve some pre-existing elements. - * blitz/array/slicing.cc: Use default value of zero for second - argument in call to changeBlock method of class - MemoryBlockReference. No need to set data_ pointer to - array.dataZero(), as this is already done by changeBlock. - * blitz/array/methods.cc: Use default value of zero for second - argument in call to changeBlock method of class - MemoryBlockReference. Removed commented out line setting - data_=array.data_, as this is now already taken care of by - changeBlock. When constructing an Array, only allocate new memory - block if numElements>0. If numElements==0, call - changeToNullBlock. This ensures that the begin() and end() - iterators are equal for an Array with size 0. - * blitz/vector.cc: Use default value of zero for second argument - in call to changeBlock method of class MemoryBlockReference. - * blitz/memblock.h: Reset data_ pointer equal to data_ pointer of - original MemoryBlockReference rather than data_ pointer of - MemoryBlock itself when constructing a new MemoryBlockReference - from an old one or changing the MemoryBlock that it refers to. - The MemoryBlock data_ pointer points to the beginning of the - usable portion of the allocated memory block, whereas the data_ - pointer for a MemoryBlockReference points to the zeroth element of - the Array allocated in this MemoryBlock. Note that the zeroth - element may not be within the valid range of the Array and the - MemoryBlockReference data_ pointer may point to an address outside - of the memory block. I've given the offset parameter for the - changeBlock method a default value of zero. Any offset value - given will indicate an offset in memory between the zeroth element - of the original Array and the modified or new Array. Normally, we - can just accept the default offset value of zero and then adjust - the position of the Array data_ pointer afterwards if needed. -2004-08-24 Julian Cummings - - * blitz/array/methods.cc: Commented out an unnecessary line of - code in Array::reference() method. - - * blitz/array/slicing.cc: Modified slice() method to flip the - ascendingFlag inside the Array storage if slicing with a backwards - Range. Factored expression for computing offset of data_ pointer. - Corrected sign error in adjustment of zeroOffset_ value. It - should move in the same direction as the data_ pointer. -2004-07-28 Theodore Papadopoulo - - * blitz/array-impl.h: Made the return type of operator()(XXX) const - be const references instead of values. Added constant case for - multicomponent_traits. - -2004-06-23 Julian Cummings - - * m4/ac_fortran_flags_preset.m4: Tuned up some of the optimization - flags and cleaned up file formatting. Allow modified forms of - compiler name for KCC, SGI and IBM compilers, so we can use - special scripts to invoke these compilers (e.g., mpxlC or newKCC). - - * m4/ac_cxx_flags_preset.m4: Tuned up some of the optimization - flags and cleaned up file formatting. Allow modified forms of - compiler name for KCC, SGI and IBM compilers, so we can use - special scripts to invoke these compilers (e.g., mpxlC or newKCC). - - * m4/ac_compilers_64bits.m4: Added special 64-bit flags for the - archiver and loader as needed. Allow modified forms of compiler - name for KCC, SGI and IBM compilers, so we can use special scripts - to invoke these compilers (e.g., mpxlC or newKCC). - - * configure.in: Moved test for 64-bit option ahead of libtool - configuration, so that libtool will pick up the proper 64-bit - flags for the archiver tool, if requested. - -2004-06-18 Julian Cummings - - * testsuite/iter.cpp: Added testing of standard iterator interface - for blitz Array iterator types and run a couple of STL algorithms - using Array iterators if BZ_HAVE_STL is defined. - * blitz/array-impl.h: Eliminated the use of the _bz_endTag struct - to request an end iterator for an Array. Just use the default - iterator constructor to do this. - * blitz/array/iter.h: Modified behavior of ArrayIterator and - ConstArrayIterator to be more STL compliant. Specifically, the - postfix operator++ now returns a const reference to the iterator - object as it was before the increment. Users should expect the - postfix operator++ to be slower than prefix operator++ and code - accordingly. If BZ_HAVE_STL is defined, we provide - specializations of the struct std::iterator_traits<> to provide - the standard interface for the Array iterator types. - - * INSTALL: Updated to describe present configuration procedure - using standardized autoconf configure script. Added note on - running autoreconf when obtaining blitz from the cvs repository. - * README: Added information about the new Blitz++ website hosted - by SourceForge. Updated the listing and explanation of the - directory structure. - -2004-06-15 Julian Cummings - - * blitz/array/reduce.h: Added new private method computeOrdering() - to class _bz_ArrayExprReduce that properly computes the ordering - values for each dimension of the reduction expression and stores - them in a data member. The computeOrdering() method checks that - ordering values from the expression to be reduced are not invalid - and are unique. This is similar to a patch submitted by Derrick - Bass for the code that constructs an Array from a _bz_ArrayExpr. - I have also eliminated the use of "rank" as a local variable name - to avoid possible confusion with the _bz_ArrayExprReduce enum of - the same name. - -2004-06-04 Julian Cummings - - * blitz/array/stencilops.h: Added missing typename keyword. - * blitz/array/stencils.h: Added blitz:: qualifier to use of - TinyVector type. - -2004-05-06 Theodore Papadopoulo - - * configure.in: Do not execute the AC_FC_FREEFORM and AC_FC_SRCEXT - macros if no fortran 90 compiler was found. - - * benchmarks/Makefile.am: search source files in $(srcdir) for - compilation speed tests. - -2004-05-05 Julian Cummings - - * lib/Makefile.am: Restored -I$(top_builddir) flag needed to find - generated config.h file when building in a separate directory. - * testsuite/Makefile.am: Restored -I$(top_builddir) flag needed to - find generated config.h file when building in a separate - directory. - * examples/Makefile.am: Restored -I$(top_builddir) flag needed to - find generated config.h file when building in a separate - directory. - * benchmarks/Makefile.am: Restored -I$(top_builddir) flag needed - to find generated config.h file when building in a separate - directory. - * doc/examples/Makefile.am: Restored -I$(top_builddir) flag needed - to find generated config.h file when building in a separate - directory. - * doc/stencils/Makefile.am: Restored -I$(top_builddir) flag needed - to find generated config.h file when building in a separate - directory. Also fixed problem with building dump-stencil - executable in wrong directory. I did this by explicitly creating - dump-stencil in the current (build) directory. - * manual/examples/Makefile.am: Restored -I$(top_builddir) flag - needed to find generated config.h file when building in a separate - directory. - * manual/stencils/Makefile.am: Restored -I$(top_builddir) flag - needed to find generated config.h file when building in a separate - directory. - - * benchmarks/Makefile.am: Moved -I flags from AM_CXXFLAGS to - AM_CPPFLAGS. Cleaned up LIBS list. - * examples/Makefile.am: Moved -I flags from AM_CXXFLAGS to - AM_CPPFLAGS. - * testsuite/Makefile.am: Moved -I flags from AM_CXXFLAGS to - AM_CPPFLAGS. - * doc/stencils/Makefile.am: Moved -I flags from AM_CXXFLAGS to - AM_CPPFLAGS. Removed use of deprecated INCLUDES variable. - * doc/examples/Makefile.am: Moved -I flags from AM_CXXFLAGS to - AM_CPPFLAGS. - * manual/examples/debug.cpp: Restored indexing bug, which is - apparently there for illustrative purposes. - * doc/examples/debug.cpp: Restored indexing bug, which is - apparently there for illustrative purposes. - * doc/examples/debug.cpp: Fixed indexing bug. - * doc/examples/cast.cpp: Cosmetic change. - * manual/stencils/Makefile.am: Moved -I flags from AM_CXXFLAGS to - AM_CPPFLAGS. - * manual/examples/Makefile.am: Activated several example codes - that had been commented out. Cleaned up list of extra files. - Moved -I flags from AM_CXXFLAGS to AM_CPPFLAGS. - * manual/examples/debug.cpp: Fixed indexing bug in this example, - just so it will run without crashing. Not sure what this code is - supposed to illustrate. - * manual/examples/cast.cpp: Corrected this example code to use - new-style blitz cast syntax. - * lib/Makefile.am: Moved -I flags from AM_CXXFLAGS to AM_CPPFLAGS. - * Makefile.am: Removed src from list of SUBDIRS because there is - nothing to do there. - * configure.in: Replaced deprecated AC_LANG_CXX and AC_LANG_F77 - with calls to AC_LANG. Reordered AC_LANG calls so we don't need - to call AC_LANG_CXX twice. Removed some superfluous m4 macro - argument quoting. Removed compiler and src subdirectories from - list of directories to configure. The compiler area is obsolete - now that autoconf does this work for us. We don't need to visit - the src directory, since the blitz library is built from the lib - directory. - -2004-04-25 Patrick Guio - - * m4/ac_cxx_flags_preset.m4: Fixed initialisation and test of ac_cxx_flags_preset variable. - -2004-04-17 Patrick Guio - - * m4/ax_dirname.m4 m4/ax_create_pkgconfig_info.m4: New files. Macros to generate - necessary files to use in conjunction with pkg-config tool (see - http://www.freedesktop.org/Software/pkgconfig). Files copied from - http://ac-archive.sourceforge.net. - * m4/ac_cxx_lib_blitz.m4: New file. Macro to check whether Blitz is installed on the - system. Useful when writing configure file for package using Blitz. - * configure.in Makefile.am: Added support for generating .pc files to use in conjunction - with pkg-config. - -2004-04-09 Julian Cummings - - * benchmarks/Makefile.am: Added AM_CXXFLAGS to compile command for - compile-time benchmarks. - - * configure.in: Reordered lists of C++ and Fortran compilers we - search for to avoid common conflicts on systems that might have - more than one compiler set installed. - - * m4/ac_cxx_flags_preset.m4: Minor formatting change. - * m4/ac_cxx_flags_preset.m4: Changed use of "match" operator - within expr commands to more standard ":" operator, since "match" - is not recognized by the implementation of expr on Darwin systems. - * lib/Makefile.am: Removed CXXFLAGS from AM_CXXFLAGS, since this - gets added separately to the compile command already. - * testsuite/Makefile.am: Removed CXXFLAGS from AM_CXXFLAGS, since - this gets added separately to the compile command already. - * examples/Makefile.am: Removed CXXFLAGS from AM_CXXFLAGS, since - this gets added separately to the compile command already. - * benchmarks/Makefile.am: Removed CXXFLAGS from AM_CXXFLAGS, since - this gets added separately to the compile command already. - * doc/examples/Makefile.am: Removed CXXFLAGS from AM_CXXFLAGS, - since this gets added separately to the compile command already. - * doc/stencils/Makefile.am: Use AM_CXXFLAGS macro for -I flags - rather than deprecated INCLUDES macro. - -2004-04-07 Julian Cummings - - * blitz/Makefile.am: Removed new-config.h from list of - blitz_HEADERS. - - * blitz/compiler.h: Changed back to - . - - * Makefile.am: Removed compiler from list of SUBDIRS and deleted - the check-compiler target. This work is now done by autoconf and - the configure script, so we don't need to do it again. - - * configure.in: Bumped AC_PREREQ value to 2.59. Changed - blitz/new-config.h back to standard name blitz/config.h. This - will now replace the old file that was generate with the bzconfig - script. Added nostdinc to list of automake options to prevent - automake from adding -I../blitz to set of default include flags. - This causes problems with Compaq cxx and is unnecessary because - blitz header files are always included as anyway. - * examples/random.cpp: Added workaround for problem with streams - when using Compaq cxx compiler. Sending a long double to an - ostream causes a core dump, so cast to double before sending - value. - - * examples/rand2.cpp: Added workaround for problem with streams - when using Compaq cxx compiler. Sending a long double to an - ostream causes a core dump, so cast to double before sending - value. - - * lib/Makefile.am: Corrected setting of include flags. - * testsuite/Makefile.am: Corrected setting of include flags. - * examples/Makefile.am: Corrected setting of include flags. - * benchmarks/Makefile.am: Corrected setting of include flags. - * doc/examples/Makefile.am: Corrected setting of include flags. - * doc/stencils/Makefile.am: Corrected setting of include flags. - * blitz/generate/Makefile.am: Removed incorrect and unneeded - definition of generatedir. - - * m4/ac_cxx_flags_preset.m4: Modified default flags for Compaq cxx - compiler to allow use of restrict keyword and simplified the set - of optimization flags. - -2004-03-24 Patrick Guio - - * doc/doxygen/Makefile.am doc/doxygen/Doxyfile.in: New files. Handles doxygen - documentation. Mostly "cut and paste" from tvmet project (tvmet.sourceforge.net). - * configure.in: Added generation of doc/doxygen/Doxyfile from - doc/doxygen/Doxyfile.in and doc/doxygen/Makefile. - * doc/doxygen/blitz.doxygen: Removed. Replaced by doc/doxygen/Doxyfile. - - -2004-03-22 Patrick Guio - - * m4: New directory containing m4 macros files that replace acinclude.m4. - * m4/ac_check_cxx_features.m4 m4/ac_compilers_64bits.m4 m4/ac_cxx_bool.m4 - m4/ac_cxx_complex_math_in_namespace_std.m4 m4/ac_cxx_const_cast.m4 - m4/ac_cxx_default_template_parameters.m4 m4/ac_cxx_dynamic_cast.m4 - m4/ac_cxx_enable_debug.m4 m4/ac_cxx_enable_optimize.m4 - m4/ac_cxx_enum_computations.m4 m4/ac_cxx_enum_computations_with_cast.m4 - m4/ac_cxx_exceptions.m4 m4/ac_cxx_explicit.m4 - m4/ac_cxx_explicit_template_function_qualification.m4 m4/ac_cxx_flags_preset.m4 - m4/ac_cxx_full_specialization_syntax.m4 m4/ac_cxx_function_nontype_parameters.m4 - m4/ac_cxx_general.m4 m4/ac_cxx_have_climits.m4 m4/ac_cxx_have_complex.m4 - m4/ac_cxx_have_complex_math.m4 m4/ac_cxx_have_ieee_math.m4 - m4/ac_cxx_have_numeric_limits.m4 m4/ac_cxx_have_rusage.m4 m4/ac_cxx_have_std.m4 - m4/ac_cxx_have_stl.m4 m4/ac_cxx_have_system_v_math.m4 m4/ac_cxx_have_valarray.m4 - m4/ac_cxx_keywords.m4 m4/ac_cxx_math_fn_in_namespace_std.m4 - m4/ac_cxx_member_constants.m4 m4/ac_cxx_member_templates.m4 - m4/ac_cxx_member_templates_outside_class.m4 m4/ac_cxx_mutable.m4 - m4/ac_cxx_namespace.m4 m4/ac_cxx_nceg_restrict.m4 m4/ac_cxx_nceg_restrict_egcs.m4 - m4/ac_cxx_old_scoping.m4 m4/ac_cxx_partial_ordering.m4 - m4/ac_cxx_partial_specialization.m4 m4/ac_cxx_reinterpret_cast.m4 - m4/ac_cxx_rtti.m4 m4/ac_cxx_standard_library.m4 m4/ac_cxx_static_cast.m4 - m4/ac_cxx_template_keyword_qualifier.m4 - m4/ac_cxx_template_qualified_return_type.m4 - m4/ac_cxx_template_qualifier_base_class.m4 - m4/ac_cxx_template_scoped_argument_matching.m4 m4/ac_cxx_templates.m4 - m4/ac_cxx_templates_as_template_arguments.m4 m4/ac_cxx_templates_features.m4 - m4/ac_cxx_type_casts.m4 m4/ac_cxx_type_promotion.m4 m4/ac_cxx_typename.m4 - m4/ac_cxx_use_numtrait.m4 m4/ac_env.m4 m4/ac_fortran_flags_preset.m4 m4/ac_info.m4 - m4/ac_lib_blas.m4 m4/ac_prog_doxygen.m4 m4/ac_send_config.m4: New files. - M4 macros containing functions definitions for the configure.in autoconf file. - * acinclude.m4: Removed. Renamed m4/blitz.m4. - * m4/blitz.m4: New file. Old acinclude.m4. - * Makefile.am configure.in: Updated to use the new m4 macros in m4 directory. - configure.in now integrates all the C++ feature tests handled earlier by - compiler/bzconfig. - It creates the file blitz/new-config.h from the template file - blitz/new-config.h.in. blitz/new-config.h replaces blitz/config.h earlier - generated by compiler/bzconfig. - The --with-cxx option is removed and replaced by standard variable parsing. - The C++ compiler flags preset can be overwritten with the new option - --disable-cxx-flags-preset. - Fortran 77/90 compilers and flags can be overwritten with the new option - --disable-fortran-flags-preset. - For more help check configure --help. - * blitz/compiler.h: Include blitz/new-config.h instead of blitz/config.h. - * blitz/Makefile.am: Added new-config.h as a file to install. - * blitz/benchext.cc blitz/benchext.h: Added optional argument for - saveMatlabGraph to be able to plot 2d graph with requested Matlab command (other - type than the earlier default semilogx). - * benchmarks/Makefile.am: Cleaned the Fortran 77/90 variables definitions. - Make use of the new FC* variables system introduced in configure.in. - * benchmarks/loop25.cpp: Corrected number of arithmetic operations to be 6. - * benchmarks/plot_benchmarks.m.in: Added information about compiler set up and - flags in the front page. - * benchmarks/stencil.cpp: Replaced Matlab semilogx command by a linear - plot command. - * config: New directory containing generated configuration files. - * config/texinfo.tex config/mdate-sh: New file. Replace doc/texinfo.tex - doc/mdate-sh. - * doc/texinfo.tex doc/mdate-sh: Removed. Replaced by config/texinfo.tex - config/mdate-sh. - * m4/ac_cxx_flags_preset.m4 m4/ac_fortran_flags_preset.m4: Fixed - unsupported syntax variable assignement of several strings on several - lines to one long string. - * benchmarks/Makefile.am: Fixed clean-local target core* into core.[0-9]*. - * m4/ac_cxx_flags_preset.m4: Fixed CXXFLAGS flag for Compaq cxx ver.>=6.3. - Actually tested just for cxx ver. 6.5. - * Makefile.am: Added so_locations cxx_repository in the clean-local target. - -2004-03-16 Julian Cummings - - * blitz/blitz.h: Added C++ tag for Emacs. I am testing the cvs - commit notification system. - -2004-03-15 Patrick Guio - - * configure.in: Replaced "echo" commands by autoconf AC_MSG_NOTICE calls. - * acinclude.m4: Quote-protected function definition names. - -2004-03-09 Julian Cummings - - * blitz/prettyprint.h: Eliminated compiler warnings by adding - explicit casts in nextArrayOperandSymbol and - nextScalarOperandSymbol methods. - * blitz/mathfunc.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. Regenerated from - genmathfunc.cpp code. - * blitz/generate/genmathfunc.cpp: Use BZ_STD_SCOPE macro to insert - std:: qualifier in front of string type where needed. - * blitz/funcs.h: Use BZ_STD_SCOPE macro to insert std:: qualifier - in front of string type where needed. - * blitz/indexexpr.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/mathf2.h: Use BZ_STD_SCOPE macro to insert std:: qualifier - in front of string type where needed. - * blitz/ops.h: Use BZ_STD_SCOPE macro to insert std:: qualifier in - front of string type where needed. - * blitz/update.h: Use BZ_STD_SCOPE macro to insert std:: qualifier - in front of string type where needed. - * blitz/array/expr.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/fastiter.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/functorExpr.h: Use BZ_STD_SCOPE macro to insert - std:: qualifier in front of string type where needed. - * blitz/array/map.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/newet-macros.h: Use BZ_STD_SCOPE macro to insert - std:: qualifier in front of string type where needed. - * blitz/array/reduce.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/stencil-et.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/where.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/zip.h: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/eval.cc: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - * blitz/array/reduce.cc: Use BZ_STD_SCOPE macro to insert std:: - qualifier in front of string type where needed. - -2004-03-01 Julian Cummings - - * doc/examples/Makefile.am: Added $(EXEEXT) to program names when - listed as prerequisites for .out files, so these executables get - built properly on platforms where executables do have a filename - extension. - -2004-02-09 Julian Cummings - - * blitz/array/newet-macros.h: Added new macro - BZ_DECLARE_FUNCTION2_SCALAR, which allows the user to declare that - a user-defined binary function may be applied to a combination of - an Array type and the given scalar type. This macro is intended - to be used in conjunction with either BZ_DECLARE_FUNCTION2 or - BZ_DECLARE_FUNCTION2_RET, which declare a user-defined binary - function applicable to a pair of Array types. The arguments for - BZ_DECLARE_FUNCTION2_SCALAR are the function name and the scalar - argument type. - -2004-02-05 Julian Cummings - - * blitz/array/newet-macros.h: Added space between macro argument - "sca" and closing angled bracket ">" to accommodate the use of a - scalar type that is a template, such as complex. This - eliminates errors reported by the Intel icc compiler. - -2004-01-20 Julian Cummings - - * blitz/array/ops.cc: Removed inline keyword from - Array::initialize() member function, since this triggers a bug in - the gcc optimizer when repeatedly assigning a scalar to an Array - slice. - * blitz/memblock.cc: Changed argument type for member function - allocate() from int to size_t and made type usage consistent - throughout this function. - - * blitz/memblock.h: Changed argument type for member function - allocate() from int to size_t to eliminate unintended type - conversion. Also fixed internal include guard, removed external - include guards and added Emacs C++ tag. - - * blitz/vector.h: Consolidated multiple versions of copy - constructor with const and non-const reference argument into a - single version to eliminate compiler warning. Also eliminated - external include guards and added Emacs C++ tag. - -2004-01-08 Julian Cummings - - * examples/prettyprint.cpp: Replaced use of obsolete macro kludge - _bz_true with ANSI C++ boolean value true. - -2004-01-06 Julian Cummings - - * blitz/array/iter.h: Fixed bug in ConstPointerStack assignment - operator. - -2003-12-30 Julian Cummings - - * testsuite/extract.cpp: Use new BZ_DECLARE_ARRAY_ET_SCALAR_OPS - macro to provide binary operators that combine an Array and an - RGB24 object. This is needed here to provide the definition for - operator==. This is an example of how to place a user-defined - concrete data type into the blitz ET system. - - * testsuite/newet.cpp: Removed the final test in this testsuite - code, which was attempting to invoke a ternary functor with one of - the arguments being a scalar int. This no longer works because - the ternary operator definition has been modified to accept only - Array-like ETBase objects. This seems like a more reasonable - behavior. One could define ternary combinations that involve one - or two scalar objects and convert them into _bz_ArrayExprConstant - objects, but I'm not sure there is any real need for such - operators. - - * blitz/array/funcs.h: Added new macro - BZ_DECLARE_ARRAY_ET_SCALAR_FUNCS that provides binary functions - for combinations of an ETBase and a scalar type. This macro is - used to provide binary funcs for Array-like objects interacting - with all the common built-in scalar types. It can also be used to - define similar functions with a user-defined scalar type. - - * blitz/array/ops.h: Added new macro - BZ_DECLARE_ARRAY_ET_SCALAR_OPS that provides binary operators for - combinations of an ETBase and a scalar type. This macro is - used to provide binary ops for Array-like objects interacting with - all the common built-in scalar types. It can also be used to - define similar operators with a user-defined scalar type. - - * blitz/array/zip.h: Renamed class _bz_ArrayExprOp to - _bz_ArrayExprBinaryOp for imporoved clarity. - - * blitz/array/newet-macros.h: Renamed class _bz_ArrayExprOp to - _bz_ArrayExprBinaryOp for imporoved clarity. Reduced the - complexity of the BZ_DECLARE_ARRAY_ET_BINARY macro. This macro - now applies only to the combination of an ETBase and an - ETBase, so the operands must be Arrays or Array - expression-like objects. Interactions between an ETBase and a - scalar type are handled by a new macro called - BZ_DECLARE_ARRAY_ET_BINARY_SCALAR, which converts the scalar into - an expression object using the asExpr class. The advantage of - this approach is that we have fewer combinations to handle and we - can avoid ambiguities that might arise with general function - templates. - - * blitz/array/asexpr.h: Renamed class _bz_ArrayExprOp to - _bz_ArrayExprBinaryOp for imporoved clarity. Added static method - getExpr() to asExpr class to convert the given type into the - T_expr type needed for expression evaluation. This allows us to - reduce the number of specializations of binary operators by - treating all ETBase objects in a uniform fashion. - - * blitz/array/expr.h: Renamed class _bz_ArrayExprOp to - _bz_ArrayExprBinaryOp for imporoved clarity. Some other cosmetic - changes as well. - - * blitz/array/bops.cc: Regenerated blitz/array/bops.cc header - file. - - * blitz/array/uops.cc: Regenerated blitz/array/uops.cc header - file. - - * blitz/generate/genarruops.cpp: Renamed _bz_ArrayExprOp class to - _bz_ArrayExprBinaryOp for improved clarity. - - * blitz/generate/genarrbops.cpp: Renamed _bz_ArrayExprOp class to - _bz_ArrayExprBinaryOp for improved clarity. - - * blitz/array/where.h: Removed some previously commented out code. - Added Emacs C++ tag. - * blitz/array/ops.cc: Replaced static_cast of ETBase objects with - call to unwrap() member function. Also removed external include - guards. - - * blitz/array/newet-macros.h: Replaced static_cast of ETBase - objects with call to unwrap() member function. Also added Emacs - C++ tag at top of file. - - * blitz/array/functorExpr.h: Replaced static_cast of ETBase - objects with call to unwrap() member function. Also added Emacs - C++ tag and Blitz file header at top of file. - - * blitz/array/funcs.h: Replaced static_cast of ETBase objects with - call to unwrap() member function. Also added Emacs C++ tag at top - of file and removed external include guards. - - * blitz/etbase.h: Added unwrap() member function to ETBase class - that performs the static cast to produce the wrapped type. Also - added Emacs C++ tag at top of file. - -2003-12-17 Julian Cummings - - * doc/blitz.texi: Patch from Peter Bienstman to add directory info - to this file, so it can be incorporated in a global texinfo index. - -2003-12-17 Patrick Guio - * doc/stencils/Makefile.am: Updated to make full use of libtool to - compile stencil generator. - -2003-12-16 Julian Cummings - - * testsuite/newet.cpp: Added some testing of support for - user-defined ternary functions. - - * blitz/array/Makefile.am: Removed newet-macros-old.h from - array_HEADERS list. - - * blitz/array/newet-macros.h: Added macros - BZ_DECLARE_ARRAY_ET_TERNARY, BZ_DECLARE_FUNCTION3, and - BZ_DECLARE_FUNCTION3_RET to provide support for creating - user-defined ternary functions that act on Arrays. This code was - originally provided by Navneet Dalal. Also folded in to this file - the macros from the file newet-macros-old.h, using the old - versions only if template template arguments are not supported. I - will be eliminating newet-macros-old.h, since it contained many - macros that were identical to the ones given here. - * blitz/array/asexpr.h: Added definition of struct template - BzTernaryExprResult, which provides the type of the result of a - ternary operator or function acting on three Arrays. - - * blitz/array/expr.h: Added definition of class template - _bz_ArrayExprTernaryOp to handle ternary operators and functions - acting on blitz Arrays. It is modeled after the where operator, - which is a very special kind of ternary operator. - -2003-12-15 Julian Cummings - - * blitz/array/functorExpr.h: Added missing typename keyword (using - _bz_typename macro) in a few places where derived types were being - passed to the BZ_PROMOTE macro. - -2003-12-12 Julian Cummings - - * configure.in: Patch from Theodore Papadopoulo to test for flags - needed to set the runtime library search path and to use dynamic - linking. - * acinclude.m4: Patch from Theodore Papadopoulo to provide - functions that test for flags needed to set the runtime library - search path and to use dynamic linking. - * doc/Makefile.am: Patch from Theodore Papadopoulo to fix problem - with building docs from a different directory than the source - directory. - -2003-12-10 Julian Cummings - - * blitz/applics.h blitz/array-impl.h blitz/bench.cc blitz/bench.h - blitz/benchext.cc blitz/benchext.h blitz/blitz.h blitz/bzdebug.h - blitz/compiler.h blitz/etbase.h blitz/extremum.h blitz/funcs.h - blitz/indexexpr.h blitz/limits-hack.h blitz/listinit.h blitz/matbops.h - blitz/matdiag.h blitz/matexpr.h blitz/matgen.h blitz/mathf2.h - blitz/mathfunc.h blitz/matltri.h blitz/matref.h blitz/matrix.cc - blitz/matrix.h blitz/matsymm.h blitz/mattoep.h blitz/matuops.h - blitz/matutri.h blitz/memblock.cc blitz/memblock.h blitz/minmax.h - blitz/mstruct.h blitz/numinquire.h blitz/numtrait.h blitz/ops.h - blitz/prettyprint.h blitz/promote-old.h blitz/promote.h - blitz/rand-dunif.h blitz/rand-normal.h blitz/random.h blitz/randref.h - blitz/range.h blitz/reduce.h blitz/shapecheck.h blitz/tinymat.h - blitz/tinymatexpr.h blitz/tinyvec.cc blitz/tinyvec.h blitz/tinyvecio.cc - blitz/tinyveciter.h blitz/tvcross.h blitz/tvecglobs.h blitz/update.h - blitz/vecaccum.cc blitz/vecall.cc blitz/vecany.cc blitz/vecbfn.cc - blitz/vecbops.cc blitz/veccount.cc blitz/vecdelta.cc blitz/vecdot.cc - blitz/vecexpr.h blitz/vecexprwrap.h blitz/vecio.cc blitz/veciter.h - blitz/vecmax.cc blitz/vecmin.cc blitz/vecnorm.cc blitz/vecnorm1.cc - blitz/vecpick.cc blitz/vecpick.h blitz/vecpickio.cc blitz/vecpickiter.h - blitz/vecsum.cc blitz/vector.cc blitz/vector.h blitz/vecuops.cc - blitz/vecwhere.cc blitz/vecwhere.h blitz/zero.cc blitz/zero.h - blitz/array/bops.cc blitz/array/cartesian.h blitz/array/cgsolve.h - blitz/array/complex.cc blitz/array/convolve.cc blitz/array/convolve.h - blitz/array/cycle.cc blitz/array/eval.cc blitz/array/expr.h - blitz/array/fastiter.h blitz/array/funcs.h blitz/array/functorExpr.h - blitz/array/geometry.h blitz/array/indirect.h blitz/array/interlace.cc - blitz/array/io.cc blitz/array/iter.h blitz/array/map.h - blitz/array/methods.cc blitz/array/misc.cc blitz/array/multi.h - blitz/array/newet-macros-old.h blitz/array/ops.cc blitz/array/reduce.cc - blitz/array/reduce.h blitz/array/resize.cc blitz/array/slice.h - blitz/array/slicing.cc blitz/array/stencil-et.h - blitz/array/stencilops.h blitz/array/stencils.cc blitz/array/stencils.h - blitz/array/storage.h blitz/array/uops.cc blitz/array/where.h - blitz/array/zip.h blitz/generate/genmathfunc.cpp - blitz/generate/genpromote.cpp blitz/meta/dot.h blitz/meta/matassign.h - blitz/meta/matmat.h blitz/meta/matvec.h blitz/meta/metaprog.h - blitz/meta/product.h blitz/meta/sum.h blitz/meta/vecassign.h - testsuite/extract.cpp testsuite/loop1.cpp - testsuite/peter-nordlund-2.cpp testsuite/promote.cpp examples/diff.cpp - examples/pauli.cpp examples/random.cpp examples/tiny2.cpp - examples/tiny3.cpp: Patch from Theodore Papadopoulo that removes the - use of some outdated macros such as _bz_bool, _bz_true, _bz_false, and - _bz_explicit, and replaces them directly with the standard C++ - keywords. These keywords should now be implemented in virtually all - C++ compilers, and this change makes the code much more standardized - and readable. This patch also replaces class with typename when - specifying a template argument that is a type. I have extended the - patch to the codes in the testsuite and examples subdirectories. - -2003-11-25 Julian Cummings - - * blitz/array/eval.cc: Fixed stack traversal evaluation routines - to properly handle the case of negative strides. Simply change - comparison "i < ubound" to "i != ubound". This fixes a bug - reported quite a while ago regarding the failure of the copy() - method when applied to a reversed Array. - -2003-11-11 Julian Cummings - - * blitz/array/newet-macros-old.h: Removed the superfluous - combination of ETBase and T2. If the first operand in a - binary function or operator is not an Array, ArrayExpr or - IndexPlaceholder, it is assumed to be a POD type. The combination - of T1 and ETBase handles all combinations of a POD type with a - Blitz Array or Array-like type. - * blitz/array/newet-macros.h: Removed the superfluous combination - of ETBase and T2. If the first operand in a binary function - or operator is not an Array, ArrayExpr or IndexPlaceholder, it is - assumed to be a POD type. The combination of T1 and ETBase - handles all combinations of a POD type with a Blitz Array or - Array-like type. Also cleaned up the file formatting. - * blitz/array/Makefile.am: Removed newbops.cc from list of array - header files for distribution. - - * blitz/Makefile.am: Added config-g++3.h to list of extra files to - include in distribution. - -2003-09-26 Patrick Guio - * blitz/applics.h: Added TwoOperandApplicativeTemplatesBase's - _bz_Min and _bz_Max. - * blitz/generate/genvecbfn.cpp: New file. Generate file blitz/vecbfn.cc - with support for C=min(A,B) and C=max(A,B) for TinyVectors. min(A,B) and - max(A,B) return an elementwise min/max of two TinyVectors of same length. - Comparable to STL min() and max() but rquires Blitz to be STL-compliant. - * blitz/generate/genarrbops.cpp blitz/generate/genmatbops.cpp: Generate - support for min() and max() applics. - The min() and max() are elementwise as for TinyVectors. Now available for - Array, Matrix and Vector. - * blitz/generate/Makefile.am: Added necessary stuff for the generation of - blitz/vecbfn.cc from blitz/generate/genvecbfn. - * blitz/array/ops.h blitz/array/newbops.cc: Added elementwise min() and - max() functions for the new ET for Arrays. - * blitz/vecbfn.cc blitz/array/bops.cc blitz/matbops.h: - Regenerated with support for elementwise min() and max(). - - -2003-09-17 Julian Cummings - - * compiler/Makefile.am: Separate targets for local config.h file - and the BZ_CONFIG_H file in blitz/config.h. The distribution - contains a default blitz/config.h file, but this should be rebuilt - when the distribution is first unpacked. - * doc/stencils/Makefile.am: Removed texinfo files from EXTRA_DIST - list, since these files are included in the distribution by the - parent directory already. - * doc/examples/Makefile.am: Removed source code files for examples - from EXTRA_DIST, since SOURCES get included in distribution - automatically. Added a few missing files to the EXTRA_DIST list. - * doc/Makefile.am: Added files containing figures for the - documentation to the distribution. - * doc/version.texi: Updated date in version.texi file. - * Makefile.am: Added manual to list of SUBDIRS to be processed by - automake. This is needed to include source files for the Blitz - html manual in the distribution. - * random/Makefile.am: Removed Makefile.am from list of header - files for subdirectory random. This file is part of the - distribution but not part of the installation. - * benchmarks/Makefile.am: Comment out the compile target, so that - it is not included in distdir target. We don't need to compile - benchmark codes in order to create a distribution. - * blitz/Makefile.am: Added file config-VS.NET2003.h to EXTRA_DIST - for inclusion in distribution. - * Makefile.am: Deleted old file config.h.in from EXTRA_DIST list - and added new files README-VS.NET and Blitz-VS.NET.zip for - inclusion in distribution. - * configure.in: Switched the optimization flag for the SGI CC - compiler back to -Ofast instead of -O3. This setting includes the - -IPA option, which seems to make a large difference in code - performance. Also, using the -IPA option does not seem to cause - problems when using a static libblitz.a, which is now once again - the default behavior of the configure script. Also, bumped the - version number to 0.7 in anticipation of a new tarball - distribution. - -2003-09-09 Julian Cummings - - * configure.in: Activated automake macro that disables building of - shared libraries by default. Shared libraries support can be - turned on with the --enable-shared option. Changed SGI compiler - default optimization flag from -Ofast to -O3 to avoid issues with - the use of interprocedural analysis (-IPA) during compilation and - linking. Removed some defunct compiler options such as KCCdev and - the old API-specific SGI32 and SGI64 options (use --enable-64bit - instead). Removed linking with SGI libCio.a, which should now be - obsolete. Deleted a few unused autoconf/automake macros. - Corrected instruction message at end of script. - - * blitz/vecexpr.h: Corrected a dependency problem noted by - Matthias Schillinger that was causing some test codes to fail to - compile. This is an example of the problem with having ET support - for the Vector and TinyVector types convolved together. - -2003-09-08 Julian Cummings - - * blitz/rand-mt.h: Replaced use of &S[0] syntax with call to - S.begin() to avoid issues with the return type of - std::vector<>::operator[]. Added call to reload() at the end of - the seed() function. - * blitz/tinyveciter.h: Changed from use of unsigned to int type to - avoid gcc compiler warnings about comparisons between unsigned and - signed integer types. Extended BZPRECONDITION checks to test that - int argument value is non-negative. - -2003-09-02 Julian Cummings - - * blitz/array/methods.cc: Fixed bug in code that constructs an - Array from an array expression. The default value for the - ordering and ascending variables when there is no Array with a - defined shape in the expression is INT_MIN, not INT_MAX. - * blitz/vecuops.cc: Freshly generated vecuops.cc header with new - support for conj() function. - * blitz/generate/genvecuops.cpp: Added support for complex math - operations and added conj() to list of generated unary operations - for vector types. - * blitz/vecexpr.h: Added missing unary minus operator for - VectorPick and TinyVector types. - -2003-07-22 Julian Cummings - - * blitz/array/methods.cc: Patch from Derrick Bass to correct a bug - that could lead to incorrect Array ordering data when performing - reductions. - -2003-06-25 Julian Cummings - - * blitz/array/fastiter.h: FastArrayIterator now uses - ConstPointerStack helper class. - * blitz/array/iter.h: Added helper class ConstPointerStack that - manages an array of saved const data pointers used in array - iterators. Moving the array of saved pointers into a separate - class eliminates compiler warnings about potentially unsafe - assignments between restricted and unrestricted pointers in the - same scope. ArrayIterator and ConstArrayIterator now have - ConstPointerStack data members. - * blitz/memblock.cc: Allocate memory block using unrestricted - pointer dataBlocxkAddress_ and then assign to restricted pointer - data_ to eliminate compiler warnings about assignments between - restricted pointers. - * blitz/memblock.h: Removed restrict label from dataBlockAddress_ - pointer, since only the data_ pointer is used in expression - evaluation and needs to be restricted. - * configure.in: Added -qhot to list of C++ optimization flags for - IBM xlC compiler, since it now supports higher-order transforms as - of version 6.0. - -2003-06-16 Patrick Guio - * configure.in: Fixed 2 flags setting for CC/SGI. - -2003-05-16 Julian Cummings - - * blitz/array/map.h: Fixed error in ascending() method reported by - Derrick Bass. The name of the corresponding method in the Array - class is isRankStoredAscending(). This method returns a bool, but - it is treated here as an int for the purposes of doing a reduction - across operands. - -2003-05-16 Patrick Guio - * configure.in: Fixed several bugs for CC/SGI setup. Library Cio should be - included as well as -ptused. - -2003-05-09 Patrick Guio - * doc/stencils/Makefile.am: Fixed lib path to be used with libtool stuff - -2003-04-02 Patrick Guio - * blitz/array/resize.cc: Added the definition of the Array class member - function resize(...) with Range argument as it was declared already - present in the Array class declaration. - -2003-03-31 Patrick Guio - * lib/Makefile.am: Enable required libtool library target libblitz.la since - AC_PROG_LIBTOOL is now in use. - -2003-03-28 Theodore Papadopoulo - - * configure.in: Re-enable AC_PROG_LIBTOOL and disable AC_PROG_RANLIB - since now libtool is used. - -2003-03-28 Theodore Papadopoulo - - * doc/examples/Makefile.am: Added $(top_builddir) in include path - for compilation in separate directory. Link against $(top_builddir) - instead of $(top_srcdir) for the same reason. - * doc/stencils/Makefile.am: Ditto. - -2003-03-13 Julian Cummings - - * blitz/range.h: Corrected my silly typo. - * blitz/range.h: Modified Range::isAscendingContiguous() to return - true for the case of a Range that contains just a single element, - since this is a valid Range for constructing an Array. -2003-02-27 Patrick Guio - * blitz/array/Makefile.am: Added newet-macros-old.h in the headers - list to install. - -2003-02-20 Julian Cummings - * blitz/bzdebug.h: Added BZ_DEBUG_PARAM macro to suppress argument - name if it is only used within optional debugging code. This will - allow us to eliminate compiler warnings about unused parameters - that occur when debugging code is disabled. Also tidied up - formatting of some of the other macro definitions. - * blitz/array-impl.h: Use BZ_DEBUG_PARAM macro to eliminate - compiler warnings about unused parameters that appear only in - optional debugging code. - * blitz/array/expr.h: Use BZ_DEBUG_PARAM macro to eliminate - compiler warnings about unused parameters that appear only in - optional debugging code. - * blitz/array/stencils.cc: Use BZ_DEBUG_PARAM macro to eliminate - compiler warnings about unused parameters that appear only in - optional debugging code. - * blitz/array/indirect.h: Use BZ_DEBUG_PARAM macro to eliminate - compiler warnings about unused parameters that appear only in - optional debugging code. - * examples/rand2.cpp: Corrected mismatch between printf format - string and argument type. - * examples/tiny2.cpp: Include tinyvec-et.h header rather than - tinyvec.h to get ET support. - * examples/curldiv.cpp: Made some repairs to this example code so - that it properly uses stencil ops. - -2003-02-14 Julian Cummings - * testsuite/matthias-troyer-2.cpp: Uncommented line which applies - the user-defined stencil. With the recent fixes to - blitz/array/stencilops.h, this code now works as expected. - * blitz/array/stencilops.h: Changed A to (*A) as needed in stencil - op definitions. This is necessary because A is an iterator type, - and the conversion operator is not automatically called when - T_numtype is not a built-in type. Also made stencil op - definitions a bit more readable and fixed a typo in backward42 - multicomponent stencil. Factorized the central difference - expressions. - * blitz/array/stencils.cc: Removed names of unused parameters in - calcStencilExtent to eliminate gcc compiler warnings. - * blitz/array/stencil-et.h: Removed name of unused parameter to - eliminate gcc compiler warning. - -2003-02-07 Julian Cummings - * blitz/indexexpr.h: Eliminated gcc compiler warnings about - explicitly initializing base class ETBase in IndexPlaceholder copy - constructor. - * blitz/array/expr.h: Removed names of some unused parameters in - _bz_ArrayExprConstant member functions to eliminate gcc compiler - warnings. Eliminated gcc compiler warnings about explicitly - initializing base class ETBase in _bz_ArrayExpr copy constructor. - * blitz/array/reduce.h: Removed names of some unused parameters in - _bz_ArrayExprReduce member functions to eliminate warnings from - gcc compiler. - * blitz/array/map.h: Removed names of unused parameters in some - ArrayIndexMapping member functions to eliminate warnings from gcc. - * blitz/array-impl.h: Removed names of unused parameters in - slice() function to eliminate compiler warnings from gcc. - Eliminated gcc compiler warnings about explicitly initializing - base classes MemoryBlockReference and ETBase in Array copy - constructor. - * blitz/array/slicing.cc: Removed name of an unused parameter in - slice() function to eliminate a warning from gcc compiler. - * blitz/array/iter.h: Removed name of unused Array parameter when - constructing a ConstArrayIterator with a _bz_endTag specifier. - Eliminates a warning from gcc. - * blitz/benchext.cc: Changed type of some loop variables from int - to unsigned to avoid comparisons between int and unsigned - expressions. Eliminated some unnecessary comparisons of unsigned - quantities with zero. - * blitz/benchext.h: Changed type of data members numParameters_ - and parameterNumber_ from int to unsigned, since these can never - be negative. - * blitz/array/stencils.cc: Removed names of unused parameters in - function getStencilExtent() to eliminate gcc warnings. - * blitz/meta/matassign.h: Removed names of some unused parameters - in fully specialized versions of _bz_meta_matAssign::f() to - eliminate gcc warnings. - * blitz/blitz.h: Added "mutable" keyword (if available) to macro - definition for declaring a mutex object. Need to make mutex - mutable when it is a member of a class with const member functions - that use the mutex, as is the case with the MemoryBlock class when - using threads. This addresses a problem noted by Patrik Jonsson - . - -2003-02-05 Patrick Guio - * depcomp install-sh mkinstalldirs missing: Removed from repository - since generated running `autoreconf -v -i'. - * Makefile.am: Added a guard to avoid overwriting COPYING and INSTALL - files when running `autoreconf -v -i -f'. - * benchmarks/Makefile.am doc/Makefile.am doc/examples/Makefile.am - doc/stencils/Makefile.am examples/Makefile.am testsuite/Makefile.am: - Do not install anything from these directories except blit.info into - $prefix/info and blitz.ps into $prefix/doc. - * config.guess config.sub: Removed from repository. Added automatically - when running `autoreconf -v -i' for recent versions of autoconf or copied - manually from the `share' directory of automake for earlier version of - autoconf. - -2003-01-23 Patrick Guio - * blitz/Makefile.am blitz/generate/Makefile.am: Added dependencies - and building rule for the generated .cc and .h files. - -2003-01-22 Julian Cummings - - * blitz/array/asexpr.h: Placed a guard around new - BzUnaryExprResult and BzBinaryExprResult code so it is only used - if compiler supports templates as template arguments. - -2003-01-22 Patrick Guio - * doc/stencils/*.texi: Removed from repository since generated by `make'. - * doc/examples/*.{texi,out}: Likewise. - * doc/stencils/Makefile.am: Added *.texi in clean-local target - * doc/examples/Makefile.am: Added *.texi and *.out in clean-local target. - Prefix ./ to $< in .out building rule. - * configure.in: Removed unecessary AC_PROG_MAKE_SET. - * blitz/generate/genmathfunc.cpp: Fixed the generation of a few functions: - the `isnan' function into `blitz_isnan' to avoid trouble with the macro - definition of `isnan' on some platforms like Compaq/cxx and HP/aCC, - added prettyPrint template member function for class template _bz_negate, - added condition BZ_HAVE_COMPLEX_MATH for the specialization of _bz_sqr for - complex. - * blitz/generate/genmatuops.cpp: Undo blitz_isnan to isnan. - * blitz/generate/genvecuops.cpp: Undo blitz_isnan to isnan. - * blitz/generate/genmathfunc.cpp: Removed implicit conversion from string - literal to char* in `one' and `two' functions by `const' the function - prototypes. - -2003-01-21 Julian Cummings - * blitz/generate/genmathfunc.cpp: Fixed categorization and scoping - problems with fmod, ilogb and isnan. - * blitz/generate/genvecuops.cpp: Renamed blitz_isnan to isnan. - * blitz/generate/genmatuops.cpp: Renamed blitz_isnan to isnan. - * Makefile.am: Commented out references to demos subdirectory. - * doc/stencils/Makefile.am: Prefix ./ to dump-stencils command. - * blitz/generate/genarrbops.cpp: Generate bops.cc header file in - blitz/array subdirectory. - * blitz/generate/genmatbops.cpp: Generate matbops.h header file in - parent directory. - * blitz/generate/genmathfunc.cpp: Generate mathfunc.h header file - in parent directory. - * blitz/generate/genvecbops.cpp: Generate vecbops.cc header file - in parent directory. - * blitz/generate/genvecwhere.cpp: Generate vecwhere.cc header file - in parent directory. - * blitz/generate/genarruops.cpp: Generate uops.cc in blitz/array - subdirectory. - * blitz/generate/genmatuops.cpp: Generate matuops.h in parent - directory. Added missing implementation of function two(). - * blitz/generate/genvecuops.cpp: Generate vecuops.cc in parent - directory. Added missing implementation of function two(). - * blitz/generate/genpromote.cpp: Generate promote-old.h header in - parent directory. Fixed return code. - * blitz/generate/Makefile.am: Added generate-headers target to run - programs in blitz/generate subdirectory. - * blitz/array/Makefile.am: Added genheaders target to handle blitz - headers that are generated in blitz/generate subdirectory. - * blitz/Makefile.am: Added genheaders target to handle blitz - headers that are generated in blitz/generate subdirectory. - * blitz/Makefile.am: Reorder subdirectories so that necessary - header files are generated prior to building in array - subdirectory. - -2003-01-21 Patrick Guio - * doc/stencils/Makefile.am: Removed an important comment. - * doc/stencils/Makefile.in: Regenerated by "autoreconf -v" (autoconf 2.53 - and automake 1.6.2). - * doc/examples/.cvsignore: Added programs. - * doc/doxygen/.cvsignore: New file. - * doc/examples/range.cpp: Fixed a bug in the initialisation of array A. - * doc/examples/range.texi: Regenerated. - * doc/blitz.info: Regenerated. - -2003-01-21 Patrick Guio - * doc: New directory that should contain any form of documentation; Currently - the Texinfo documentation files were ported from the original documentation - written for the `yodl' documentation program. - * doc/blitz.info: New file. Generated by makeinfo when running make. Added - to avoid problem is makeinfo not present. - * doc/about.texi doc/arrays-stencils.texi doc/install.texi - doc/arrays-ctors.texi doc/arrays-storage.texi doc/legal.texi - doc/arrays-debug.texi doc/arrays-types.texi doc/numinquire.texi - doc/arrays-expr.texi doc/arrays-usertype.texi doc/parallel.texi - doc/arrays-globals.texi doc/blitz.texi doc/platforms.texi - doc/arrays-indirect.texi doc/compiling.texi doc/random.texi - doc/arrays-intro.texi doc/constants.texi doc/tau.texi doc/arrays-io.texi - doc/copyright.texi doc/tinymatrix.texi doc/arrays-members.texi - doc/download.texi doc/tinyvector.texi doc/arrays-multi.texi doc/faq.texi - doc/tuning.texi doc/arrays-slicing.texi doc/help.texi doc/version.texi: - New files. the Texinfo documentation files. - * doc/blitz.gif doc/sinsoid.eps doc/slice.txt doc/tensor1.gif - doc/blitztiny.jpg doc/sinsoid.gif doc/strideslice.eps doc/tensor1.txt - doc/indirect.eps doc/sinsoid.txt doc/strideslice.gif doc/indirect.gif - doc/slice.eps doc/strideslice.txt doc/indirect.txt doc/slice.gif - doc/tensor1.eps doc/indirect.fig doc/slice.fig doc/strideslice.fig - doc/tensor1.fig: New files. Pictures in different format for the - documentation. - * doc/texinfo.tex: New file. Texinfo definition file. - * doc/Makefile.am: New file. - * doc/makedatestring doc/mdate-sh doc/Makefile.in: New files. Generated by - "autoreconf -v" (autoconf 2.53 and automake 1.6.2). - * doc/stamp-vti: New file. Date and version of the doc. Generated by - running `make stamp-vti'. - * doc/.cvsignore: New file. - * doc/stencils: New directory. Contains stencil Texinfo documentation. - * doc/stencils/backward11.texi doc/stencils/central34.texi - doc/stencils/backward12.texi doc/stencils/central42.texi - doc/stencils/backward21.texi doc/stencils/central44.texi - doc/stencils/backward22.texi doc/stencils/forward11.texi - doc/stencils/backward31.texi doc/stencils/forward12.texi - doc/stencils/backward32.texi doc/stencils/forward21.texi - doc/stencils/backward41.texi doc/stencils/forward22.texi - doc/stencils/backward42.texi doc/stencils/forward31.texi - doc/stencils/central12.texi doc/stencils/forward32.texi - doc/stencils/central14.texi doc/stencils/forward41.texi - doc/stencils/central22.texi doc/stencils/forward42.texi - doc/stencils/central24.texi doc/stencils/Laplacian2D4.texi - doc/stencils/central32.texi doc/stencils/Laplacian2D.texi: New files. Can - be generated by making target `stencils'. - * doc/stencils/dump-stencil.cpp: New file. To generate the stencils - documentation files. - * doc/stencils/Makefile.am: New file. - * doc/stencils/Makefile.in: New file. Generated by "autoreconf -v" - (autoconf 2.53 and automake 1.6.2). - * doc/stencils/.cvsignore: New file. - * doc/examples: New directory. Contains codes and output that are included - in the documentation. - * doc/examples/cast.cpp doc/examples/output.cpp doc/examples/debug.cpp - doc/examples/range.cpp doc/examples/dump.cpp doc/examples/simple.cpp - doc/examples/fixed-class.cpp doc/examples/slicing.cpp - doc/examples/fixed.cpp doc/examples/storage.cpp doc/examples/io.cpp - doc/examples/strideslice.cpp doc/examples/outer.cpp doc/examples/xor.cpp: - New files. Code samples to be used to include in the documentation. - * doc/examples/cast.texi doc/examples/output.texi - doc/examples/debug.texi doc/examples/range.texi - doc/examples/dump.texi doc/examples/simple.texi - doc/examples/fixed-point.texi doc/examples/slicing.texi - doc/examples/fixed.texi doc/examples/storage.texi - doc/examples/io.texi doc/examples/strideslice.texi - doc/examples/outer.texi doc/examples/xor.texi: New files. Files to be - included in the documentation. Can be generated from the .cpp files by - making target `texi'. - * doc/examples/makefile.example: New file. Makefile sample to be included - in the documentation. - * doc/examples/cast.out doc/examples/outer.out - doc/examples/storage.out doc/examples/debug.out doc/examples/output.out - doc/examples/strideslice.out doc/examples/dump.out - doc/examples/range.out doc/examples/xor.out doc/examples/fixed.out - doc/examples/simple.out doc/examples/io.out doc/examples/slicing.out: New - files. Sample output to be included in the documentation. Can be generated - by making target `out'. - * doc/examples/fixed-point.h doc/examples/io.data: New files. Needed to - generate the documentation. - * doc/examples/Makefile.am: New file. - * doc/examples/Makefile.in: New file. Generated by "autoreconf -v" - (autoconf 2.53 and automake 1.6.2). - * doc/examples/.cvsignore: New file. - * doc/doxygen: New directory for doxygen documentation. - * doc/doxygen/blitz.doxygen: New file. Configuration file needed by - doxygen to generate html and latex documentations. The command is then - `doxygen blitz.doxygen'. - * configure.in: Added creation of Makefiles in doc directory in - AC_CONFIG_FILES. Updated version of Blitz in AM_INIT_AUTOMAKE. - * Makefile.am: Removed `demos' and `manual' and added `doc' in SUBDIRS. - * configure: Regenerated by `autoreconf -v' - (autoconf 2.53 and automake 1.6.2). - * Makefile.in: Likewise. - -2003-01-14 Patrick Guio - * blitz/applics.h blitz/array-impl.h blitz/array-old.h blitz/array.h - blitz/bench.[cc,h] blitz/benchext.[cc,h] blitz/blitz.h blitz/bzdebug.h - blitz/compiler.h blitz/etbase.h blitz/extremum.h blitz/funcs.h - blitz/indexexpr.h blitz/limits-hack.h blitz/listinit.h blitz/matdiag.h - blitz/matexpr.h blitz/matgen.h blitz/mathf2.h blitz/matltri.h - blitz/matref.h blitz/matrix.[cc,h] blitz/matsymm.h blitz/mattoep.h - blitz/matutri.h blitz/memblock.[cc,h] blitz/minmax.h blitz/mstruct.h - blitz/numinquire.h blitz/numtrait.h blitz/ops.h blitz/prettyprint.h - blitz/promote.h blitz/rand-dunif.h blitz/rand-normal.h blitz/rand-tt800.h - blitz/rand-uniform.h blitz/random.h blitz/randref.h blitz/range.h - blitz/reduce.h blitz/shapecheck.h blitz/tau.h blitz/timer.h blitz/tiny.h - blitz/tinymat.h blitz/tinymatexpr.h blitz/tinymatio.cc blitz/tinyvec-et.h - blitz/tinyvecio.cc blitz/tinyveciter.h blitz/traversal.cc traversal.h - blitz/tuning.h blitz/tvcross.h blitz/tvecglobs.h blitz/update.h - blitz/vecaccum.cc blitz/vecall.cc blitz/vecany.cc vecbops.cc - blitz/veccount.cc blitz/vecdelta.cc blitz/vecdot.cc blitz/vecexpr.h - blitz/vecexprwrap.h blitz/vecglobs.[cc,h] blitz/vecio.cc blitz/veciter.h - blitz/vecmax.cc blitz/vecmin.cc blitz/vecnorm.cc blitz/vecnorm1.cc - blitz/vecpick.[cc,h] blitz/vecpickio.cc blitz/vecpickiter.h - blitz/vecsum.cc blitz/vector-et.h blitz/vector.[cc,h] blitz/vecuops.cc - blitz/vecwhere.[cc,h] blitz/zero.[cc,h]: Removed the cvs log and added - them to the ChangeLog - -2003-01-13 Patrick Guio - * benchmarks/acou3df2.f benchmarks/acou3df90.f90 - benchmarks/acou3df902.f90: Removed the cvs log and added them to the - ChangeLog - * random/mt.h: Likewise - * manual/examples/storage.cpp: Likewise - * manual/blitz02.html: Likewise - * compiler/bzconfig: Likewise - * examples/array.cpp examples/cast.cpp examples/deriv.cpp examples/erf.cpp - examples/fixed.cpp examples/matmult.cpp examples/numinquire.cpp - examples/outer.cpp examples/pick.cpp examples/qcd.cpp examples/rangexpr.cpp - examples/reduce.cpp examples/simple.cpp examples/slicing.cpp - examples/storage.cpp examples/tiny.cpp examples/where.cpp: Likewise - * blitz/meta/dot.h blitz/meta/matassign.h blitz/meta/matmat.h - blitz/meta/matvec.h blitz/meta/metaprog.h blitz/meta/product.h - blitz/meta/sum.h blitz/meta/vecassign.h: Likewise - * blitz/generate/bzfstream.h blitz/generate/genarrbops.cpp - * blitz/generate/genmatbops.cpp blitz/generate/genmatuops.cpp - * blitz/generate/genvecbops.cpp: Likewise - * blitz/tinyvec.[cc,h]: Likewise - * blitz/tinyvec.[cc,h]: Added a templated constructor for TinyVector that - construct from another TinyVector with the same length but a different - element type - -2003-01-10 Theodore Papadopoulo - * blitz/array/*.h: Removed the cvs log and added them to the ChangeLog. - -2003-01-08 Theodore Papadopoulo - * configure.in: Removed demos/Makefile from the generated files since this - directory is empty for now. - * blitz/array/asexpr.h: Introduce BzUnaryExprResult and BzBinaryExprResult - to describe the type of an expression. - * blitz/array/newet-macros.h: Use it here. Add a cast in - BZ_DECLARE_FUNCTION2 to avoid a warning. - -2003-01-07 Patrick Guio - * Makefile.in benchmarks/Makefile.in bin/Makefile.in - blitz/array/Makefile.in blitz/generate/Makefile.in blitz/meta/Makefile.in - compiler/Makefile.in examples/Makefile.in lib/Makefile.in - manual/examples/Makefile.in manual/stencils/Makefile.in random/Makefile.in - src/Makefile.in testsuite/Makefile.in: Regenerated with "autoreconf -v" - (autoconf 2.53 and automake 1.6.2). - * configure.in: Likewise. - * blitz/generate/arroperands.h blitz/generate/genmathfunc.cpp: Added - #include (as in blitz/generate/operands.h and - blitz/generate/operands2.h) that was needed for strcmp() and strlen() - functions call. - * blitz/generate/.cvsignore: Added .deps and gen* progs - * manual/examples/.cvsignore manual/stencils/.cvsignore: Added .deps - * benchmarks/Makefile.am lib/Makefile.am blitz/generate/Makefile.am: - the clean-local target should remove recursively template directories like - cxx_repository and ti_files. - * benchmarks/Makefile.in lib/Makefile.in blitz/generate/Makefile.in: - Regenerated with "autoreconf -v" (autoconf 2.53 and automake 1.6.2). - * compiler/.cvsignore benchmarks/.cvsignore blitz/generate/.cvsignore - lib/.cvsignore: Added template directory cxx_repository. - -2003-01-06 Theodore Papadopoulo - * configure.in: Do not substitute the variable SUBDIRS. Created - a new old-gcc compiler option. Updated the gcc compiler option - for modern gcc (at least >=3.1). - * All Makefiles.am: Remove SUBDIRS where appropriate. - * testsuite/Makefile.am, examples/Makefile.am: Use top_builddir - instead of top_srcdir to allow building in a separate directory. - * blitz/generate/Makefile.am: Partial rewrite to allow the build - of the generation programs. - -2003-01-06 Theodore Papadopoulo - * testsuite/reduce.cpp: Bug correction. - -2003-01-06 Theodore Papadopoulo - * blitz/generate/*.[h,cpp]: Update to ISO C++. - * blitz/compiler/tempkey.cpp: Update to ISO C++ (and g++-3.4). - * blitz/array-impl.h: Partial update to ISO C++ syntax (needed for g++-3.4). - -2002-12-21 Patrick Guio - * benchmarks/daxpy.cpp: prefer new C++ cast to old C style cast - -2002-12-19 Julian Cummings - * blitz/meta/vecassign.h: Removed names of function arguments in - the static methods of the _bz_meta_vecAssign<0,0> specialization - to eliminate compiler warnings about unused variables. - * blitz/array/eval.cc: Several changes to eliminate compiler - warnings about unused variables. - * testsuite/extract.cpp: Uncommented a couple of additional tests. - * testsuite/free.cpp: Removed an unused variable. - * blitz/range.h: Removed name of an unused function argument. - * testsuite/module1.cpp: Removed an unused variable. - -2002-12-17 Patrick Guio - * benchmarks/acousticf2.f: fixed in commented test prog call to - echo_f77Tuned instead of echo_f77, duplicate echo_f77_set into echo_f77_set2 - to allow independant compilation of test prog - * benchmarks/acou3df.f: Added support for test prog - * benchmarks/acou3df2.f: Added support for test prog, added explicit INTEGER - declaration for i,j,k in subroutine acoustic3d_f77Tuned_stencil - * benchmarks/acou3d.cpp: number of operations in variable Mflops corrected - from 9 (in acoustic.cpp) to 11 - * benchmarks/ctime3v.cpp benchmarks/ctime4v.cpp benchmarks/ctime5v.cpp: Added - necessary overloaded math functions for float type argument for gcc 2.95.3 - * benchmarks/ctime5.cpp: replaced in macro BZ_USING_NAMESPACE - -2002-12-16 Patrick Guio - * configure.in: Added as suggested by Zane Dodson - the AM_MAINTAINER_MODE automake macro to avoid generating the rules to - rebuild these out-of-date maintainer files by default. To rebuild them run - configure with the option --enable-maintainer-mode - * aclocal.m4: Regenerated by autoreconf 2.53 - * configure: Regenerated by autoreconf 2.53 - * Makefile.in benchmarks/Makefile.in bin/Makefile.in blitz/Makefile.in - blitz/array/Makefile.in blitz/generate/Makefile.in blitz/meta/Makefile.in - compiler/Makefile.in examples/Makefile.in lib/Makefile.in - manual/Makefile.in manual/examples/Makefile.in manual/stencils/Makefile.in - random/Makefile.in src/Makefile.in testsuite/Makefile.in: Regenerated by - autoreconf 2.53 - -2002-12-12 Patrick Guio - * aclocal.m4: updated using the autoconf tool autoreconf 2.53 that is - dedicated to update generated configuration files - -2002-12-10 Patrick Guio - * benchmarks/loops.data: New file. Input file for benchmarks/makeloop - * configure.in: Added flag -fno-second-underscore for GNU g77 and - fixed CXXFFLAGS for that compiler. - When using g77 and linux assume no Fortran 90 compiler is available. - Added AM_CONDITIONAL call to allow benchmarking without Fortran 90 - compiler available. - Fixed AC_CHECK_LIB(blas, ...), added extra arguments to take into account - necessary extra libs (needed for linux) - * benchmarks/Makefile.am: Added conditional compilation code using - automake conditional flag generated by AM_CONDITIONAL() in configure.in - * configure: Rebuilt with autoconf 2.53 - * Makefile.in benchmarks/Makefile.in bin/Makefile.in blitz/Makefile.in - blitz/array/Makefile.in blitz/generate/Makefile.in blitz/meta/Makefile.in - compiler/Makefile.in examples/Makefile.in lib/Makefile.in - manual/Makefile.in manual/examples/Makefile.in manual/stencils/Makefile.in - random/Makefile.in src/Makefile.in testsuite/Makefile.in: Rebuilt with - automake 1.6.2 - * benchmarks/makeloops.cpp: Modified to allow the generation of loops - kernel benchmark without Fortran 90 compiler available. Do not generate - makefile.inc. Generate Fortran 90 routines with .f90 extension - * benchmarks/loop?.cpp: Regenerated from makeloop program and loops.data - * benchmarks/.cvsignore: Added qcd.m and makeloops - * benchmarks/daxpy.cpp: bench conditionnaly valarray and f90 routines if - C++ header and Fortran 90 are availabel respectively - * benchmarks/qcd.cpp: use /#if/#elif/#endif/ syntax for FORTRAN_SYMBOLS - check - * benchmarks/stencil.cpp: bench conditionnaly f90 routines if Fortran 90 - is available - * benchmarks/haney.cpp: bench conditionnaly valarray and f90 routines if - C++ header and Fortran 90 are availabel respectively. Conditional - compilation added to avoid a crash with GNU g++ - * benchmarks/acoustic.cpp: cosmetic changes. int cast added where necessary - to avoid warnings from g++ - * benchmarks/acou3df.f: Added declaration of k as INTEGER - * benchmarks/acou3d.cpp: bench conditionnaly f90 routines if Fortran 90 is - available. Reorganised in a way similar to benchmarks/acoustic.cpp - * benchmarks/benchext.cc: Removed Blitz assert that could not be fullfiled - in template BenchmarkExt::setNumParameters(int numParameters) - -2002-11-26 Julian Cummings - - * benchmarks/qcdf.f: Corrected declaration of double precision - complex arrays in subroutine qcdf. Use type complex*16 and make - site number the final (slowest varying) dimension. - -Fri Oct 25 10:56:21 CEST 2002 Patrick Guio - * configure.in: fixed Fortran compiler message for linux - * configure: Rebuilt with autoconf 2.53 - -2002-10-17 Julian Cummings - * benchmarks/acou3df2.f: Removed some extraneous semicolons that were - inhibiting compilation of these Fortran sources with some compilers. - -2002-10-09 Patrick Guio - * src/Makefile.am: Moved library target libblitz.a into lib directory - to avoid unecessary and not so clean target like making a copy into lib - directory - * src/Makefile.in: Rebuilt with automake 1.6.2 - * lib/Makefile.am: Added library target libblitz.a - * lib/Makefile.in: Rebuilt with automake 1.6.2 - * Makefile.am: Modified src targets into lib where necessary - * Makefile.in: Rebuilt with automake 1.6.2 - -2002-09-19 Julian Cummings - * testsuite/Adnene-Ben-Abdallah-2.cpp: Skip part of this test when - using IBM xlC compiler, since it can't handle it. - * testsuite/extract.cpp: Added workaround for IBM xlC compiler - bug. - * compiler/ieeemath.cpp: Commented out unused variable. Added - test to skip over testing of lgamma() function if building on AIX - platform in threaded mode. This test causes a failure in this - case because the lgamma function has a slightly different - interface in thread-safe mode on AIX. - * src/Makefile.am: Added explicit command for building libblitz.a - that utilizes $AR_FLAGS. - * src/Makefile.in: Regenerated from Makefile.am using automake. - * configure.in: Cleaned up some of the settings of compiler flags - and added use of the C++ compiler as the archiver for KCC and SGI - CC. Added new --enable-64bit option to trigger 64-bit - configuration and builds. This currently works for the SGI and - IBM platforms with the native compilers or with KCC or gcc. Note: - I have left the old SGI64 and SGI32 compiler options in for now, - but there is also a new plain old SGI option that will build with - the default ABI. Using the --enable-64bit option should then - force 64-bit compiles. - * configure: Regenerated from configure.in using autoconf. - * blitz/array/fastiter.h: Removed meaningless restrict qualifier - inside const_cast that generates compiler warnings under SGI and - KCC compilers. - -2002-09-05 Julian Cummings - * configure: Regenerated from configure.in using autoconf. - * configure.in: Added flag -qrtti=all when using IBM's xlC - compiler to enable RTTI support. - -2002-08-30 Julian Cummings - * testsuite/storage.cpp: Modified construction of one Array to - test new setStorage() method. - * blitz/array/methods.cc: Added definition of setStorage() method, - which lets user set Array storage format after construction. We - check that Array is not allocated first. - * blitz/array-impl.h: Added declaration of setStorage() method, - which lets user set Array storage format after construction. - * blitz/array/storage.h: Added explicit assignment operator for - GeneralArrayStorage class. - -2002-07-24 Patrick Guio - * configure.in: Removed occurences of -DBZ_DISABLE_XOPEN_SOURCE, this is - the default and changed to -DBZ_ENABLE_XOPEN_SOURCE for DECcxx6.2 and - DECcxx6.3 required for scoping of ilogb. - * configure: Rebuilt from configure.in using autoconf 2.53 - * all Makefile.in: Rebuilt from Makefile.am using automake 1.6.2 - -2002-07-23 Julian Cummings - * blitz/array/functorExpr.h: Changed _bz_FunctorExpr classes to - store user-defined functor by value rather than by const - reference. This fixes a problem under certain compilers with the - state of the user-defined functor being corrupted prior to - evaluation of the Array expression. Also, cleaned up remnants - from previous use of Blitz minmax function. - * blitz/array/expr.h: Added a four-argument templated constructor - for _bz_ArrayExpr, which is needed when building an Array - expression containing a functor that takes three arguments. This - is needed to support functorExpr.h, which allows functors with up - to three arguments. - * testsuite/newet.cpp: Changed expressions used in final test to - ensure that two-argument pow() function is called with first - argument being non-negative. Otherwise, the results of the - function call are undefined. - -2002-07-19 Julian Cummings - * blitz/blitz.h: Put ending semicolon into definition of - BZ_MUTEX_* macros so that you don't need to add a semicolon after - invoking the macro. - * blitz/memblock.h: Removed ending semicolon after invocations of - BZ_MUTEX_* macros. This is now handled within the definition of - these macros. This should get rid of compiler warnings from SGI - CC and others about extra semicolons being ignored, which happened - when these macros were defined as blank. - -2002-07-19 Patrick Guio - * blitz/mathfunc.h: Added missing template member function prettyPrint - in template class _bz_negate (HP aCC complained during library - compilation). - * blitz/array/functorExpr.h: minmax::max(a,b) replaced by (a>b?a:b) due to - namespacing trouble with HP/aCC - -2002-07-17 Julian Cummings - * blitz/memblock.h: Added missing semicolon after use of - BZ_MUTEX_DECLARE macro. - -2002-07-16 Julian Cummings - * Makefile.am: Added line to specify subdirectories and insure - that compiler subdirectory is visited before blitz subdirectory. - This is necessary because the compiler subdirectory generates the - config.h header file needed in blitz subdirectory. - * Makefile.in: Rebuilt from Makefile.am. - * configure: Rebuilt from configure.in. Includes changes to IBM - xlf90 flags. - * aclocal.m4: Rebuilt using aclocal 1.6.2. - * blitz/Makefile.am: Added config.h to blitz_HEADERS list and - removed all the compiler-specific config files. These - compiler-specific files should no longer be needed because we - normally generate the config.h file automatically. Moved the - compiler-specific headers to the EXTRA_DIST list so they will - still be included in the distribution tarball. - * blitz/Makefile.in: Rebuilt from Makefile.am. - * blitz/array/funcs.h: Removed ET support for Array expressions - involving ldexp(), jn() and yn() functions. These functions - require specialized macros that allow one of the function - arguments to be an ordinary int. Such macros have not yet been - added to . - -2002-07-02 Julian Cummings - * examples/cast.cpp: Added check for use of new-style expression - templates. I changed the syntax for casting the type of a blitz - Array to make it simpler. Instead of giving a second argument - that is an instance of the new element type, just provide the new - element type as a template parameter. - * examples/array.cpp examples/deriv.cpp examples/erf.cpp examples/fixed.cpp - examples/matmult.cpp examples/outer.cpp examples/pick.cpp examples/qcd.cpp - examples/rangexpr.cpp examples/reduce.cpp examples/simple.cpp - examples/slicing.cpp examples/storage.cpp examples/tiny.cpp - examples/where.cpp: Updated to use new header file names that avoid - capitalization. - * testsuite/newet.cpp: Added testing of functorExpr.h header file, - which provides support for declaring user-defined functors and - class methods that act on Array types. The code for this support - originated from Derrick Bass of Caltech. - * testsuite/tinyvec.cpp: Include blitz/tinyvec-et.h here to get ET - support. - * blitz/vecexpr.h: Undid the previous change to this file. Vector - ET support is now gotten by including blitz/vector-et.h - explicitly. - * blitz/promote.h: Added BZ_BLITZ_SCOPE to promote_trait in - BZ_PROMOTE macro definition so that this macro works correctly - outside the blitz namespace. - * blitz/Makefile.am: Removed Array.h, Vector.h, TinyVec.h and - TinyMat.h from blitz_HEADERS. Added array-old.h, vector-et.h - tinyvec-et.h and funcs.h. - * blitz/Makefile.in: Rebuilt from Makefile.am to update - blitz_HEADERS. - * blitz/tinyvec-et.h: This is the new name for the TinyVec.h file, which - includes the TinyVector class implementation and ET support using the old - style expression templates. - * blitz/array.h: This file used to be called Array.h but has been - renamed to avoid name clashes on Windows-based systems. This file - includes only the Array class implementation files. - * blitz/array-old.h: This is the new name for the old array.h header file - that included all of the Array, TinyVector and Vector stuff along with - Vector ET support. It is now deprecated. - * blitz/array/newet-macros.h: Renamed and reorganized new style - macros for declaring unary and binary functions/operators that act - on Array types. - * blitz/array/newet.h: Added #include of blitz/array/functorExpr.h - to support user-defined functors and class methods acting on Array - types. - * blitz/array/funcs.h: Use new style of Array ET macros to declare - unary and binary math functions that act on Array types. - * blitz/array/ops.h: Use new style of Array ET macros to create - unary and binary operators that act on Array types. - * blitz/ops.h: Rewrote and reorganized this file to make better - use of macros to generate all the functor classes needed to - provide unary and binary operators for the "new" style of - expression templates. - * blitz/array/Makefile.am: Added functorExpr.h to list of - array_HEADERS. - * blitz/array/Makefile.in: Rebuilt from Makefile.am to include new - header file. - -2002-06-28 Julian Cummings - * blitz/mathfunc.h: Corrected categorization of fmod and ilogb - math functions. - * blitz/vecuops.cc: Freshly generated from genvecuops.cpp. - Changed BZ_HAVE_SYSV_MATH to BZ_HAVE_SYSTEM_V_MATH to match what - is in config.h and elsewhere. Corrected categorization of a few - math functions. Otherwise, no changes. - * blitz/matuops.h: Freshly generated matuops.h header file with - definitions for unary math functions operating on Matrix types. - This file was previously empty. I followed a style similar to - that of vecuops. - * blitz/matbops.h: Rebuilt from latest genmatbops.cpp. Same - content as before with slightly different formatting. Also, - binary combinations with complex are now handled with a single - templated ComplexOperand type as with vecbops. - * blitz/generate: Added support for generating matuops.h header - file borrowing heavily from the existing genvecuops code. Also - made a few corrections in categorization of math functions. - * configure.in: Added -qstrict flag to F77 and F90 optimization - flags on AIX platforms to avoid optimizations that might alter the - code semantics, as suggested by the compiler. Added flag for - xlf90 compiler to allow f90 file suffix. - * config.guess: Undo previous change to this script regarding use - of oslevel script on AIX. This was a mistake based on an error in - my environment variable settings. - -2002-06-27 Julian Cummings - * compiler/sysvmath.cpp: Removed ilogb() and fmod() from this - test. ilogb() is now in the IEEE math test and fmod() should be - in the standard C math library. - * compiler/ieeemath.cpp: Added tests for cbrt() and ilogb() here. - * compiler/getruse.cpp: Added code to check if getrusage() is - available. - * blitz/benchext.cc: Changed loop variable j to unsigned to - eliminate signed/unsigned comparisons. - * testsuite/complex-test.cpp: Added some explicit casts to float - to eliminate precision loss warnings from implicit conversions of - double to float. Changed abs() to fabs() for floating-point - arguments. abs() is for ints or complex numbers, while fabs() is - for floating-point numbers. - * testsuite/chris-jeffery-1.cpp: Slight change to eliminate - precision loss warning from double/float conversion. - * blitz/array/storage.h: Use _bz_bool and _bz_true where - appropriate to avoid int/bool conversions. - * blitz/array/fastiter.h: Changed order of ctor initializers to - match order of member data declarations, eliminating warning from - gcc compiler. - * blitz/array/slicing.cc: Modified valid indexing check to avoid - casting to unsigned. - * blitz/shapecheck.h: Changed loop variable i to unsigned to avoid - unsigned/signed comparisons. - * blitz/array-impl.h: Modified the isInRange() methods to check - that each index value is greater than or equal to the base index - value, rather than just casting (index-base) to an unsigned int. - The latter gives unpredictable results if index - * configure: Rebuilt from configure.in. - * configure.in: Moved package and version arguments back to - AM_INIT_AUTOMAKE macro call. The new style suggested in the - automake documentation doesn't seem to work right when the tarball - name contains uppercase letters and is not trivially derived from - the package name. - * configure.in: Added section to handle xlC as a valid C++ - compiler option. Moved package name and version number arguments - to AC_INIT macro. Moved AC_CANONICAL_TARGET macro call higher up - in file to eliminate autoconf warning. - * config.guess: Commented out use of oslevel command on AIX - platform to get OS version and release info, since it does not - always seem to report the right thing and can send confusing - output to config.sub. - * examples/Makefile.in: Rebuilt from examples/Makefile.am. - * examples/Makefile.am: Added profile.cpp, tiny2.cpp and tiny3.cpp - to EXTRA_DIST, so that these files will be included in the - distribution tarball. This is necessary because these files are - not include in SOURCES and are not normally compiled. - * testsuite/extract.cpp: Changed members red, green and blue of - class RGB24 from static const int's to enumerators in order to - avoid a link-time error with xlC compiler. - * blitz/tinyvec.h: Changed P_numtype to T_numtype inside class - definition consistently. - * blitz/array-impl.h: Changed template parameter name T_numtype2 - to P_numtype2 in member function template declarataions for - consistency with definitions and to avoid any confusion with - typedef T_numtype. - * blitz/array/slicing.cc: Changed T_numtype to P_numtype when used - outside the argument list or body of a member function definition - (i.e., outside the class scope). Inside the class scope, we can - use the typedef T_numtype. The IBM xlC compiler gets confused if - P_numtype is used as a template parameter name in a member - function declaration and then T_numtype is used as the parameter - name in the member function definition. Fixed usage to be more - consistent. - * blitz/array/methods.cc: Likewise. - * blitz/array/ops.cc: Likewise. - * blitz/tinyvec.cc: Likewise. - * blitz/array/et.h: Added #include of blitz/array/misc.cc if not - using new expression templates. - * blitz/array-impl.h: Explicitly specify second template argument - for ListInitializationSwitch, rather than relying on the default - value. This eliminates a compilation problem using the xlC - compiler. Also removed #include of misc.cc, which is now handled - in blitz/array/et.h. - * blitz/tinymat.h: Likewise. - * blitz/tinyvec.h: Likewise. - -2002-05-27 Julian Cummings - * blitz/array-impl.h: Removed use of this-> as means of accessing - members of templated base class. Instead provided using - declarations for these members within the derived class - definitions to bring them into the scope of the derived class. - * blitz/array/slicing.cc: Likewise. - * blitz/array/eval.cc: Likewise. - * blitz/array/methods.cc: Likewise. - * blitz/array/storage.h: Likewise. - * blitz/array/stencil-et.h: Likewise. - * blitz/array/iter.h: Likewise. - * blitz/matrix.cc: Likewise. - * blitz/vector.cc: Likewise. - * blitz/memblock.h: Likewise. Changed this->addReference() to - MemoryBlock::addReference(). Use base class name as scoping - qualifier rather than "this" pointer. - * blitz/matrix.h: Likewise. - * blitz/vector.h: Likewise. - -2002-05-24 Patrick Guio - * INSTALL: Added instruction for cvs repository installation in case a - problem should occur with the build system. - * configure.in: Removed some unecessary check - * configure: Regenerated with autoconf 2.53, automake 1.6.1, libtool 1.4.2 - * every Makefile.in: Likewise - -2002-05-23 Patrick Guio - * configure.in: Added support for ranlib on SGI that failed and a few - other tests. The variable RANLIB is set to "ar ts" for SGI as well as for - DECcxx. - * blitz/Makefile.am: Added tinymatio.cc in the blitz_HEADERS variable - * configure: Regenerated with autoconf 2.53, automake 1.6.1, libtool 1.4.2 - * every Makefile.in: Likewise - -2002-05-22 Julian Cummings - * blitz/array/eval.cc: Fixed bug in - Array::evaluateWithIndexTraversal1() by removing cast of second - argument to T_numtype in call to T_update::update(). This cast - will occur automatically when the update operation is performed. - This fixes a problem reported by Masahiro Tatsumi - in which one could not assign a double to an - Array of TinyVectors of double without explicitly constructing a - TinyVector of doubles on the right-hand side. Also fixed an - unused variable warning emanating from the function - Array::evaluateWithFastTraversal() by moving the definition of - local variable "last" so that it is only seen if it is used. - * blitz/tinymatio.cc: New file (I/O operations for blitz TinyMatrix) - * blitz/tinymat.h: Added #include of - -2002-05-10 Patrick Guio - * blitz/randref.h: private constructor for template class _bz_VecExprRandom - did not had an explicit initialiser for the private member random_, added - it. - Compaq C++ V6.5-014 for Compaq Tru64 UNIX V5.1A (Rev. 1885) complained - about this. - * blitz/array/map.h: Likewise for template class ArrayIndexMapping - -2002-05-08 Patrick Guio - * configure.in: Regenerated by running the command autoupdate from - autoconf 2.53 - * configure: Regenerated by running the command autoreconf --force --install - with autoconf 2.53, automake 1.6.1, libtool 1.4.2 - * COPYING: Likewise - * Makefile.in: Likewise - * benchmarks/Makefile.in: Likewise - * bin/Makefile.in: Likewise - * blitz/Makefile.in: Likewise - * blitz/array/Makefile.in: Likewise - * blitz/generate/Makefile.in: Likewise - * blitz/meta/Makefile.in: Likewise - * compiler/Makefile.in: Likewise - * examples/Makefile.in: Likewise - * lib/Makefile.in: Likewise - * manual/Makefile.in: Likewise - * manual/examples/Makefile.in: Likewise - * manual/stencils/Makefile.in: Likewise - * random/Makefile.in: Likewise - * src/Makefile.in: Likewise - * testsuite/Makefile.in: Likewise - * aclocal.m4: Likewise - * install-sh: Likewise - * missing: Likewise - * mkinstalldirs: Likewise - * depcomp: New file generated by running autoreconf --force --install - with autoconf 2.53, automake 1.6.1, libtool 1.4.2 - * config.guess: imported from automake 1.6.1 - * config.sub: Likewise - * benchmarks/Makefile.am: Removed target 'compile' that caused trouble when - 'make dist' - * benchmarks/Makefile.in: Regenerated with automake - * examples/Makefile.am: Removed targes 'compile' and 'all' that caused - trouble when 'make dist' - * examples/Makefile.in: Regenerated with automake - -2002-04-17 Patrick Guio - * blitz/array/stencil-et.h: replaced T_numtype with P_numtype in - every macros definitions. Fixed a compilation problem with aCC/HP - in the stencils examples (stencils2.cpp, stencil3.cpp, stencilet.cpp) - in the directory examples. - Suggested by Robert W. Techentin - -2002-03-21 Patrick Guio - * examples/io.cpp: Added #ifdef BZ_HAVE_STD for #include - * examples/polymorph.cpp: corrected polymorph container declaration - Array by Array - * examples/prettyprint.cpp: Removed unecessary #include - * examples/rand2.cpp: Likewise - * examples/rand2.cpp: Added return 0; to function int main2() - * examples/tiny3.cpp: fixed typo #include into - #include - * examples/Makefile.{am,in}: Removed targets tiny2.cpp (no int main()), - tiny3.cpp (no int main()) and profile.cpp (requires tau profiling) - * array/stencil-et.h: replaced iter_ by this->iter_ in derived template - classes of StencilExpr template class - -2002-03-07 Patrick Guio - * configure.in: f90 optimisation flag set to O3 for platform *hp-hpux* - * blitz/array/io.cc: cosmetic change - * blitz/traversal.h: moved - template - _bz_typename TraversalOrderCollection::T_set - TraversalOrderCollection::traversals_; - after the declaration of - template class TraversalOrderCollection - * TODO: New file - * blitz/array/expr.h: - line 124 - #ifdef BZ_NEW_EXPRESSION_TEMPLATES replaced by - #if defined(BZ_NEW_EXPRESSION_TEMPLATES) && ! defined(__MWERKS__) - line 134 added - #if !defined(__MWERKS__) - #endif - as suggested by Xavier Warin - for Metrowerks code warrior compiler - * random/mt.h: fixed use of STL iterator as suggested by - Julian Cummings and - Osamu Ogasawara - * blitz/config-mwerks.h: New file, configuration file for Metrowerks code - warrior compiler proposed by Xavier Warin - * INSTALL: updated with the new make targets - * Makefile.am (in each directory): fixed a bug for the target dist - * configure.in: Added support for Intel icf (Fortran 90 compiler) when - target is *linux* and CXX is Intel icc - * benchmarks/Makefile: Added work.pc* in clean target, these files are - generated when running Intel Fortran 90 compiler ifc - * benchmarks/plot_benchmarks.m.in: New file to generate m-file - benchmarks/plot_benchmarks.m to plot benchmarks results - * configure.in: Added benchmarks/plot_benchmarks.m to be generated from - benchmarks/plot_benchmarks.m.in - * configure.in: output correctly the available DEC (actually now Compaq:-) - compilers - * INSTALL: Likewise - -2002-03-06 Patrick Guio - * ChangeLog: New file. - * testsuite/mattias-lindstroem-1: Removed (executable) - * configure.in: updated with autoupdate (GNU autoconf) 2.49c, added support - for HP-UX aCC and Intel icc. - * aclocal.m4: renamed to acinclude.m4. It contains the *_BZ_* declarations - * acinclude.m4: New file generated by running aclocal (GNU automake) 1.4a - * Makefile.am (in each directory): New files (to generate a Makefile.in with - automake) - * bin/Makefile.in, blitz/Makefile.in, blitz/array/Makefile.in, - * blitz/generate/Makefile.in, blitz/meta/Makefile.in, lib/Makefile.in, - * manual/Makefile.in, manual/examples/Makefile.in, - * manual/stencils/Makefile.in, random/Makefile.in,: New files generated - from Makefile.am generated by running automake (GNU automake) 1.4a - * .cvsignore (in each directory): New files - * missing: New file generated when running the script autoconf -a -c -i - (GNU autoconf) 2.49c - * mkinstalldirs: Likewise - * config.guess: updated from automake 1.4a - * config.sub: Likewise - * GPL: renamed to COPYING according to GNU standard - * AUTHORS, COPYING, NEWS: New files to follow the GNU standard - * benchmarks/*f90.f: renamed to benchmarks/*f90.f90 - * examples/complex.cpp: renamed to examples/complex-test.cpp - * testsuite/complex.cpp: renamed to testsuite/complex-test.cpp due to - interference with #include - * Makefile.in, aclocal.m4, configure, benchmarks/Makefile.in, - bin/Makefile.in, blitz/Makefile.in, blitz/array/Makefile.in, - blitz/generate/Makefile.in, blitz/meta/Makefile.in, compiler/Makefile.in, - demos/Makefile.in, examples/Makefile.in, lib/Makefile.in, - manual/Makefile.in, manual/examples/Makefile.in, - manual/stencils/Makefile.in, random/Makefile.in, src/Makefile.in, - testsuite/Makefile.in: Regenerated files from Makefile.am generated by - running automake (GNU automake) 1.4a - * manual/examples/Makefile, manual/stencils/Makefile: Removed, now generated - by configure - * CHANGELOG: renamed to ChangeLog.1 - * manual/examples/*.out: renamed to manual/examples/*.log due to - interference with automake - * compiler/elabbase.cpp: bar() replaced by this->bar() in zowee() const for - ANSI C++ correctness (14.6.2(3) in the C++ Standard) - * src/globals.cpp: char* _blitz_id replaced by char _blitz_id[] - * random/uniform.h: irng_.random() replaced by this->irng_.random() - everywhere for ANSI C++ correctness (14.6.2(3) in the C++ Standard) - * random/discrete-uniform.h: Likewise - * random/normal.h: getUniform() replaced by this->getUniform() - * blitz/array-impl.h: data_ replaced by this->data_ everywhere, - changeToNullBlock() replaced by this->changeToNullBlock(), - class _bz_endTag replaced by struct _bz_endTag { } declaration - * blitz/array/resize.cc: for (d=0; d < N_rank; ++d) replaced by for (int - d=0; d < N_rank; ++d) (for scoping) - * blitz/array/iter.h: - data_ replaced by this->data_ in - template - class ArrayIterator : public ConstArrayIterator {}, - removed struct _bz_endTag { }; declaration - * blitz/array/io.cc: Added typename (_bz_typename) qualifier to the - iterator and const_iterator of Array - * blitz/ops.h: `os' replaced by `str' in the BitwiseNot template - * blitz/vector.h: data_ replaced by this->data_ everywhere - * blitz/array/slicing.cc: Likewise - * blitz/array/eval.cc: Likewise - * blitz/array/methods.cc: data_ replaced by this->data_ everywhere - numReferences() replaced by this->numReferences() - * blitz/vector.cc: data_ replaced by this->data_ everywhere, - numReferences() by this->numReferences() - * blitz/matrix.cc: data_ replaced by this->data_ - * blitz/matrix.h: Likewise everywhere - * blitz/promote.h: typename replaced by _bz_typename - * blitz/array/reduce.cc: TinyVector replaced by - TinyVector - * blitz/array/storage.h: in - template - class FortranArray : public GeneralArrayStorage {} and - template - class ColumnMajorArray : public GeneralArrayStorage {} - ordering_, ascendingFlag_, base_ replaced by this->ordering_, - this->ascendingFlag_, this->base_ - noInitializeFlag() replaced by - GeneralArrayStorage::noInitializeFlag() - * blitz/array/where.h: minmax::max(minmax::max(stride1,stride2),stride3) - replaced by - stride1>(stride2=(stride2>stride3?stride2:stride3))?stride1:stride2 - * blitz/transversal.cc, blitz/transversal.h: template declaration - template - _bz_typename TraversalOrderCollection::T_set - TraversalOrderCollection::traversals_; - in blitz/transversal.cc moved before template specialisation - template<> - class TraversalOrderCollection<0> {} - in blitz/transversal.h - * blitz/array/fastiter.h: for BZ_HAVE_STD only - #include replaced by #include - ostrstream ostr replaced by ostringstream ostr - * testsuite/matthias-troyer-1.cpp, testsuite/matthias-troyer-2.cpp: - Removed unnecessary includes for iostream.h and complex.h - * examples/cfd.cpp: Added #ifdef BZ_HAVE_STD for #include - * examples/haney.cpp: Likewise for #include - * blitz/vecexpr.h: (re)inserted includes for vecbops, vecuops and vecbfn - in order to compile testsuite/tinyvec.cpp - * benchmarks/ct.cpp: Removed (output of g++ -E) - * benchmarks/acou3db4.cpp: corrected typo in - #include , should be - #include - * benchmarks/acou3d.cpp: Added #include for call to - generateFastTraversalOrder function - * benchmarks/stencil.cpp: Likewise - * blitz/memblock.h: In the constructor - MemoryBlock(size_t length, T_type* _bz_restrict data) - dataBlockAddress_ = data replaced by dataBlockAddress_ = 0 - as it was before. (testsuite/extract does not crash then) - * testsuite/matthias-troyer-2.cpp: changed the statement - A = exp(c * (sqr(i-midpoint) + sqr(j-midpoint) + sqr(k-midpoint))); - by - A = zip( exp(c * (sqr(i-midpoint) + sqr(j-midpoint) + sqr(k-midpoint))), - 0.0, complex()); - Still a problem with the complex stencil, right now the statement - applyStencil(kinEnergy(),A,B); is commented - -2002-02-28 tveldhui - * blitz/memblock.h: Fixed extra semicolon problem with KCC. - -2001-02-22 tveldhui - * manual/blitz02.html: Fixed minor bug in docs. - -2001-02-15 tveldhui - * blitz/array-impl.h: Fixed typo. - * blitz/blitz.h: Fixed problem with BZ_THREADSAFE macros. - -2001-02-11 tveldhui - * blitz/array-impl.h: Fixed prototype typos - * blitz/blitz.h: Fixed minor typo. - -2001-02-11 Julian Cummings - * blitz/array/domain.h, blitz/array/slicing.cc: Added StridedDomain class - and more versions of resizeAndPreserve. - -2001-02-04 tveldhui - * blitz/blitz.h blitz/memblock.h: Made memory block reference counting - (optionally) threadsafe when BZ_THREADSAFE is defined. Currently uses - pthread mutex. When compiling with gcc -pthread, _REENTRANT automatically - causes BZ_THREADSAFE to be enabled. - -2001-01-26 tveldhui - * blitz/array/eval.cc blitz/array/methods.cc blitz/Array.h - examples/array.cpp blitz/benchext.cc blitz/range.h blitz/TinyVec.h - blitz/tuning.h: More source code reorganization to reduce compile times. - * blitz/array/stencils.cc: Incorporated 1D stencil fix from Derrick Bass. - * blitz/array/funcs.h, blitz/arrayuops.cc: Changed isnan to blitz_isnan, - to avoid conflicts with implementations that define isnan as a - preprocessor macro. - * random/mt.h: Incorporated changes from Max Domeika for STL - compatibility. - * examples/cast.cpp examples/deriv.cpp examples/erf.cpp examples/fixed.cpp - examples/matmult.cpp examples/outer.cpp examples/pick.cpp - examples/qcd.cpp examples/rangexpr.cpp examples/reduce.cpp - examples/simple.cpp examples/slicing.cpp examples/storage.cpp - examples/tiny.cpp examples/where.cpp: More source code reorganization - to reduce compile times. - * blitz/meta/metaprog.h: Updated docs to reflect isnan -> blitz_isnan - change - * blitz/meta/sum.h: Fixed bug found by Masahiro TATSUMI - -2001-01-25 tveldhui - * blitz/array/asexpr.h blitz/array/cartesian.h blitz/array/cgsolve.h - blitz/array/convolve.h blitz/array/domain.h blitz/array/et.h - blitz/array/funcs.h blitz/array/geometry.h blitz/array/indirect.h - blitz/array/multi.h blitz/array/newet-macros.h blitz/array/newet.h - blitz/array/ops.h blitz/array/stencil-et.h blitz/array/stencilops.h - blitz/array/stencils.h blitz/array/storage.h blitz/array/where.h - blitz/array/zip.h blitz/array/complex.cc blitz/array/convolve.cc - blitz/array/cycle.cc blitz/array/io.cc blitz/array/misc.cc - blitz/array/newbops.cc blitz/array/ops.cc blitz/array/reduce.cc - blitz/array/resize.cc blitz/array/slicing.cc blitz/array/stencils.cc - blitz/tinyvec.cc blitz/etbase.h blitz/limits-hack.h blitz/mathf2.h - blitz/minmax.h blitz/traversal.cc: Ensured that source files have cvs logs. - -2001-01-24 tveldhui - * Updated copyright date in headers. - * blitz/array/methods.cc examples/array.cpp examples/cast.cpp - examples/deriv.cpp examples/fixed.cpp examples/simple.cpp: Widespread - changes to reduce compile time. For backwards compatibility, - #include enables BZ_GANG_INCLUDE mode which includes - all array and vector functionality (about 120000 lines of code). - #include includes a minimal subset of Array functionality; - other features must be included explicitly. - * blitz/array/eval.cc blitz/Array.h blitz/blitz.h blitz/TinyVec.h - blitz/vecexpr.h blitz/vector.h: Reorganized #include orders to avoid - including the huge Vector e.t. implementation when using Array. - -2000-06-19 tveldhui - * manual/examples/storage.cpp: Initial source check-in; added files not - usually released in the distribution. - * manual/blitz02.html: Likewise - * blitz/generate/genarrbops.cpp blitz/generate/genvecbops.cpp: Initial - source check-in; added files not usually released in thedistribution. - * blitz/vecuops.cc: Initial source check-in; added files not usually - released in the distribution. - -1999-01-25 allan@stokes.ca (Allan Stokes) www.stokes.ca - * random/mt.h: adapted to STL-like idiom - -1998-12-06 tveldhui - * blitz/memblock.h: Prior to adding UnownedMemoryBlock. - -1998-06-15 tveldhui - * blitz/memblock.h: When a memory block is created from an existing block - of data, add an additional reference count so that makeUnique() will - create a copy of the data. - -1998-04-03 tveldhui - * compiler/bzconfig: Added command-line options, noninteractive mode - -1998-03-14 tveldhui - * 0.2-alpha-05 - -1998-02-25 tveldhui - * blitz/array/eval.cc: Initial revision. - -1997-08-18 tveldhui - * Just prior to implementing fastRead() optimization for array - expression evaluation. - -1997-08-15 tveldhui - * Just prior to loop-collapse change - -1997-07-16 tveldhui - * Alpha release 0.2 (Arrays) - -1997-07-03 tveldhui - * examples/numinquire.cpp: Initial revision - -1997-02-28 tveldhui - * examples/qcd.cpp blitz/rand-tt800.h: Initial revision - -1997-01-24 tveldhui - * blitz/bench.h: Prior to rewrite of Bench class; in this version, Bench - contain each benchmark implementation. - -1997-01-23 tveldhui - * blitz/vecwhere.h: Initial revision - -1997-01-13 tveldhui - * blitz/numtrait.h blitz/vecglobs.h blitz/vecpick.h blitz/vecpickiter.h - blitz/zero.h: Initial revision. - -1996-11-11 tveldhui - * blitz/memblock.h blitz/range.h blitz/tuning.h: Initial revision. - -1996-11-01 tveldhui - * compiler/bzconfig: Added type promotion check; tidied up. - -1996-10-31 tveldhui - * blitz/matrix.h blitz/vector.h: Did away with multiple template parameters. - Only numeric type and structure parameters now. - -1996-04-16 todd - * compiler/bzconfig: Initial revision. All important features checked. - diff --git a/ChangeLog.2 b/ChangeLog.2 deleted file mode 100644 index 2245874c..00000000 --- a/ChangeLog.2 +++ /dev/null @@ -1,431 +0,0 @@ -2011-05-23 Patrik Jonsson - - * configure.ac, blitz/tinyvec.h, blitz/meta/vecassign.h: Created - configure option --enable-simd-width=, which sets the loop - size and alignment to facilitate compiler vectorization. - -2011-05-20 Patrik Jonsson - - * configure.ac, blitz/numtrait.h, blitz/tinyvec.cc, - blitz/tinyvec.h, blitz/array/methods.cc, blitz/array/reduce.h, - blitz/meta/vecassign.h, m4/ac_cxx_alignment_directive.m4, - m4/ax_prefix_config_h.m4: Tweaked TinyVector assignment to - facilitate vectorization with icpc. Added an autoconf macro which - tests for the gcc and icpc ways of specifying structure - alignment, and used it to set alignment for the TinyVector data_ - member to 16, which is required by SSE. - -2011-05-10 Patrik Jonsson - - * configure.ac, blitz/blitz.h, blitz/memblock.h, - testsuite/Makefile.am, testsuite/pthread.cpp: Added support for - lock-free, thread-safe reference counting with Intel Threading - Building Blocks atomic types. This is enabled with '--with-tbb'. - If Blitz is configured with --enable-threadsafe, it will now give - an error if the compiler threading support isn't enabled rather - than silently disabling thread support. Added test 'pthread', - enabled if the compiler supports pthreads. (Consequently, this - only works with --disable-cxx-flags-preset, so -DBZ_DEBUG was - hardcoded for the test suite.) It would be good to have tests for - the Windows and OpenMP threads, too. - -2011-05-09 Patrik Jonsson - - * blitz/memblock.h: MemoryBlocks now are created with reference - count =1, so the MemoryBlockReference that creates it does not - have to call addReference. This saves an unnecessary mutex lock. - Added function isThreadsafe that returns true if blitz was - configured with --enable-threadsafe. - -2011-05-09 Patrik Jonsson - - * testsuite/64bit.cpp: Updated 64-bit test so it only tries the - upper end of the range, in the interest of time. - -2011-03-26 Patrick Guio - - * bin/update-cl.sh: - Force time to be interpreted in GMT instead of local time to - avoid discrepancies when running the script from different time - zones. - -2011-03-25 Julian Cummings - - * src/globals.cpp: Use BZ_PACKAGE_STRING macro defined by configure - script to obtain library name and version number. - -2011-03-25 Julian Cummings - - * blitz/tiny.h: Update header file copyright info. This header file - is apparently still needed for template metaprogramming loops in - blitz/meta. - -2011-03-25 Julian Cummings - - * configure.ac: Update name of email list for blitz bug reports. - -2011-03-25 Julian Cummings - - * blitz/applics.h, blitz/array-impl.h, blitz/array-old.h, - blitz/array.h, blitz/bench.cc, blitz/bench.h, blitz/benchext.cc, - blitz/benchext.h, blitz/blitz.h, blitz/bzconfig.h, - blitz/bzdebug.h, blitz/compiler.h, blitz/etbase.h, - blitz/extremum.h, blitz/funcs.h, blitz/indexexpr.h, - blitz/limits-hack.h, blitz/listinit.h, blitz/matdiag.h, - blitz/matexpr.h, blitz/matgen.h, blitz/mathf2.h, blitz/matltri.h, - blitz/matref.h, blitz/matrix.cc, blitz/matrix.h, blitz/matsymm.h, - blitz/mattoep.h, blitz/matutri.h, blitz/memblock.cc, - blitz/memblock.h, blitz/minmax.h, blitz/mstruct.h, - blitz/numinquire.h, blitz/numtrait.h, blitz/ops.h, - blitz/prettyprint.h, blitz/promote.h, blitz/rand-dunif.h, - blitz/rand-mt.h, blitz/rand-normal.h, blitz/rand-tt800.h, - blitz/rand-uniform.h, blitz/random.h, blitz/randref.h, - blitz/range.h, blitz/reduce.h, blitz/shapecheck.h, blitz/tau.h, - blitz/timer.h, blitz/tinymat.h, blitz/tinymatexpr.h, - blitz/tinymatio.cc, blitz/tinyvec-et.h, blitz/tinyvec.cc, - blitz/tinyvec.h, blitz/tinyvecio.cc, blitz/tinyveciter.h, - blitz/traversal.cc, blitz/traversal.h, blitz/tuning.h, - blitz/tvcross.h, blitz/tvecglobs.h, blitz/update.h, - blitz/vecaccum.cc, blitz/vecall.cc, blitz/vecany.cc, - blitz/veccount.cc, blitz/vecdelta.cc, blitz/vecdot.cc, - blitz/vecexpr.h, blitz/vecexprwrap.h, blitz/vecglobs.cc, - blitz/vecglobs.h, blitz/vecio.cc, blitz/veciter.h, - blitz/vecmax.cc, blitz/vecmin.cc, blitz/vecnorm.cc, - blitz/vecnorm1.cc, blitz/vecpick.cc, blitz/vecpick.h, - blitz/vecpickio.cc, blitz/vecpickiter.h, blitz/vecproduct.cc, - blitz/vecsum.cc, blitz/vector-et.h, blitz/vector.cc, - blitz/vector.h, blitz/vecwhere.h, blitz/zero.cc, blitz/zero.h, - blitz/array/asexpr.h, blitz/array/cartesian.h, - blitz/array/cgsolve.h, blitz/array/complex.cc, - blitz/array/convolve.cc, blitz/array/convolve.h, - blitz/array/cycle.cc, blitz/array/domain.h, blitz/array/et.h, - blitz/array/eval.cc, blitz/array/expr.h, blitz/array/fastiter.h, - blitz/array/funcs.h, blitz/array/functorExpr.h, - blitz/array/geometry.h, blitz/array/indirect.h, - blitz/array/interlace.cc, blitz/array/io.cc, blitz/array/iter.h, - blitz/array/map.h, blitz/array/methods.cc, blitz/array/misc.cc, - blitz/array/multi.h, blitz/array/newet-macros.h, - blitz/array/newet.h, blitz/array/ops.cc, blitz/array/ops.h, - blitz/array/reduce.cc, blitz/array/reduce.h, - blitz/array/resize.cc, blitz/array/shape.h, blitz/array/slice.h, - blitz/array/slicing.cc, blitz/array/stencil-et.h, - blitz/array/stencilops.h, blitz/array/stencils.cc, - blitz/array/stencils.h, blitz/array/storage.h, - blitz/array/where.h, blitz/array/zip.h, blitz/meta/dot.h, - blitz/meta/matassign.h, blitz/meta/matmat.h, blitz/meta/matvec.h, - blitz/meta/metaprog.h, blitz/meta/product.h, blitz/meta/sum.h, - blitz/meta/vecassign.h, random/F.h, random/beta.h, - random/chisquare.h, random/default.h, random/discrete-uniform.h, - random/exponential.h, random/gamma.h, random/mt.h, - random/mtparam.cc, random/normal.h, random/uniform.h: Updating - file header information to include current copyright info. Ensure - that each file has a CVS Id string and that each header file has - a C++ tag for Emacs. - -2011-03-04 Julian Cummings - - * LICENSE, COPYING, COPYING.LESSER: Update license file to - reference LGPL v3. Version 3 of GPL is in COPYING and LGPL is in - COPYING.LESSER file. - -2011-02-23 Julian Cummings - - * LICENSE: Fixed typo in LICENSE file. (testing cvs syncmail) - -2011-02-18 Julian Cummings - - * configure.ac, doc/stamp-vti, doc/version.texi: Update version - number from 0.9 to 0.10 in preparation for new Blitz maintenance - release. - -2011-02-17 Patrick Guio - - * Makefile.am: - Replaced similar (check-*) targets recipes by a multiple target - recipe. - -2011-02-14 Patrick Guio - - * bin/update-cl.sh: - Small utility script to update ChangeLog by calling `cvs2cl' if - found. Currently the date filter everything strictly after - 2009-12-31. - -2011-02-14 Patrick Guio - - * ChangeLog.1: - Moved ChangeLog up to 2009-12-31 into ChangeLog.1. - -2011-02-14 Patrick Guio - - * ChangeLog.0, ChangeLog.1: - Moved ChangeLog.1 to ChangeLog.0. - -2011-02-14 Patrick Guio - - * config/mdate-sh: - Removed as it is installed by `autoreconf -vif'. - -2011-02-14 Patrick Guio - - * benchmarks/Makefile.am: - Added cfd as part of the OTHER_BENCH targets. - -2011-02-14 Patrick Guio - - * benchmarks/cfd.cpp: - Added missing tinyVector expression template header. - -2011-02-13 Patrick Guio - - * configure.ac: - Added report message for 64-bit dimensions. Fixed - enable-64-bit-variable name typo. - -2010-12-08 Theodore Papadopoulo - - * ChangeLog, manual/makeHTMLIndex.cpp: Convert to modern C++. - -2010-12-08 Theodore Papadopoulo - - * ChangeLog, doc/doxygen/Doxyfile.in: Use pdflatex. - -2010-07-06 Patrick Guio - - * examples/erf.cpp, examples/qcd.cpp: - Fixed header declaration to ISO standard C++ and added missing - using namespace declaration. - -2010-05-26 Theodore Papadopoulo - - * ChangeLog, configure.ac: Correct the FULLY64BIT define - -2010-03-14 Patrik Jonsson - - * blitz/array/methods.cc, testsuite/Makefile.am, - testsuite/weakref.cpp: Fixed a bug in the weakReference method - when used on arrays with non-simple storage like those resulting - from slicing higher-dimension arrays. Added a - testsuite/weakref.cpp to check that this works. - -2010-02-24 Patrik Jonsson - - * blitz/reduce.h: Made sure all classes had reset() functions that - are declared const. Thanks to George S. Hugh for finding that. - -2010-02-09 Patrik Jonsson - - * configure.ac, benchmarks/stencil.cpp, blitz/array-impl.h, - blitz/bench.cc, blitz/bench.h, blitz/benchext.cc, - blitz/benchext.h, blitz/etbase.h, blitz/funcs.h, - blitz/indexexpr.h, blitz/reduce.h, blitz/array/asexpr.h, - blitz/array/expr.h, blitz/array/fastiter.h, - blitz/array/functorExpr.h, blitz/array/map.h, - blitz/array/newet-macros.h, blitz/array/reduce.h, - blitz/array/slice.h, blitz/array/stencil-et.h, - blitz/array/stencilops.h, blitz/array/where.h, src/globals.cpp, - testsuite/Makefile.am, testsuite/arrayresize.cpp, - testsuite/expression-slicing.cpp, testsuite/safeToReturn.cpp, - testsuite/stencil-et.cpp: Merged the changes from the stencil-et - branch to the trunk. - -2010-02-08 Patrik Jonsson - - * configure.ac, blitz/array-impl.h, blitz/blitz.h, - blitz/indexexpr.h, blitz/memblock.cc, blitz/memblock.h, - blitz/range.h, blitz/tvecglobs.h, blitz/array/cartesian.h, - blitz/array/domain.h, blitz/array/eval.cc, - blitz/array/fastiter.h, blitz/array/functorExpr.h, - blitz/array/iter.h, blitz/array/methods.cc, - blitz/array/slicing.cc, blitz/array/stencil-et.h, - blitz/array/where.h, blitz/meta/dot.h, blitz/meta/product.h, - blitz/meta/sum.h, config/mdate-sh, testsuite/64bit.cpp, - testsuite/Makefile.am: Merged compat-64bit branch to the trunk. - -2010-02-08 Patrik Jonsson - - * configure.ac, blitz/blitz.h, testsuite/64bit.cpp, - testsuite/Makefile.am: Added 64-bit testcase (takes a very long - time to run) and prepended BZ_ to the FULLY64BIT define. - -2010-02-05 Patrik Jonsson - - * blitz/blitz.h: Changed indexType to be ptrdiff_t instead of - size_t for 64-bit dimensions. - -2010-02-05 Patrik Jonsson - - * configure.ac, blitz/array-impl.h, blitz/blitz.h, - blitz/indexexpr.h, blitz/memblock.cc, blitz/memblock.h, - blitz/range.h, blitz/array/cartesian.h, blitz/array/domain.h, - blitz/array/eval.cc, blitz/array/fastiter.h, - blitz/array/functorExpr.h, blitz/array/iter.h, - blitz/array/methods.cc, blitz/array/slicing.cc, - blitz/array/stencil-et.h, blitz/array/where.h: Changed size_t and - ptrdiff_t to sizeType and diffType, defined in blitz.h. Also - added indexType, to be used for array indexing if dimensions > - 2^31 are needed, but it needs to be tracked down and changed in - the code. - -2010-01-21 Patrik Jonsson - - * blitz/array/stencil-et.h: Made _bz_makeRange(nilArraySection) - inline. - -2010-01-21 Patrik Jonsson - - * src/globals.cpp: Reordered the include files so blitz/array.h is - first, otherwise there are errors. - -2010-01-21 Patrik Jonsson - - * blitz/indexexpr.h, blitz/array/expr.h, blitz/array/fastiter.h, - blitz/array/functorExpr.h, blitz/array/map.h, - blitz/array/reduce.h, blitz/array/slice.h, - blitz/array/stencil-et.h, blitz/array/where.h, - testsuite/Makefile.am, testsuite/expression-slicing.cpp: Added - ability to slice expressions. This required adding a number of - operator()s to _bz_ArrayExpr, since it is the 'top level' - expression class, and the remaining expression classes only need - the 11-argument operator() template. All expression classes also - have a SliceInfo class that gives the return type of a slice, - completely analogously to the SliceInfo class for Arrays. It is - not possible to slice reductions, index remappings or index - placeholder expressions, which require nontrivial operations on - the slice arguments. Added a testcase expression-slicing. - -2010-01-19 Patrik Jonsson - - * blitz/indexexpr.h, blitz/reduce.h, blitz/array/expr.h, - blitz/array/fastiter.h, blitz/array/functorExpr.h, - blitz/array/map.h, blitz/array/reduce.h, - blitz/array/stencil-et.h, blitz/array/where.h: Declared access - member functions of the expression template classes (like - operator*(), operator(), fastRead, shift, and shapeCheck as - const. This required making 'conceptually const' members like the - stencil class iterators and the reduction variables mutable. This - change is necessary to not run into trouble passing expressions - to functions, as you can't pass a non-const reference to a - temporary. - -2010-01-19 Patrik Jonsson - - * testsuite/stencil-et.cpp: Added tests of reductions of stencil - results. - -2010-01-19 Patrik Jonsson - - * testsuite/arrayresize.cpp: Added note about expected valgrind - error in this test. - -2010-01-19 Patrik Jonsson - - * configure.ac, testsuite/Makefile.am: Added support for Mike - Teschs valgrind tests if valgrind is in the path. - -2010-01-13 Patrik Jonsson - - * blitz/bench.cc, blitz/bench.h, blitz/benchext.cc, - blitz/benchext.h: Added a method savePylabGraph to the Benchmark - class, which is called whenever saveMatlabGraph is called. - -2010-01-13 Patrik Jonsson - - * benchmarks/stencil.cpp: Added versions of the stencil benchmark - that test the new stencil operator expressions. - -2010-01-12 Patrik Jonsson - - * blitz/array/fastiter.h, testsuite/safeToReturn.cpp: Added a - wrapper safeToReturn() that can be used to safely return - expressions from functions even if they reference arrays that - will go out of scope. There's also a testcase testing it. - -2010-01-12 Patrik Jonsson - - * blitz/array-impl.h, blitz/array/expr.h, blitz/array/fastiter.h, - blitz/array/map.h, blitz/array/stencil-et.h, config/mdate-sh, - testsuite/stencil-et.cpp: Updated the ArrayIndexMapping class so - it can be passed as an operand to stencils. To accomplish this it - was made to contain a FastArrayIterator, instead of an Array, so - that the moveTo and shift_ members work. The Array operator() - that take index placeholders were modified to work with new - ArrayIndexMapping. Also had to make the moveTo and operator() - members of the classes in expr.h be templated on number of - dimensions. Added testcases of applying stencils to expressions - involving remapped dimensions in stencil-et.cpp - -2010-01-09 Patrik Jonsson - - * testsuite/Makefile.am, testsuite/stencil-et.cpp: Added testcase - for stencil expressions. - -2010-01-09 Patrik Jonsson - - * blitz/array/stencil-et.h: Added comment explaining what - operations don't work with stencil expressions. - -2010-01-09 Patrik Jonsson - - * blitz/array/expr.h, blitz/array/functorExpr.h: Added versions of - _bz_offsetData taking dimension arguments. - -2010-01-08 Patrik Jonsson - - * blitz/array/stencil-et.h: Added support for BZ_ET_STENCIL2. Added - shift methods to ET classes. Improved code implementing the - stencil calls for expressions and arrays to be more concise, - though the requirement to have exact matches for all calls using - Arrays necessitates a lot of code duplication. Result type - declarations of BZ_ET_STENCIL2 needs work. - -2010-01-08 Patrik Jonsson - - * blitz/array/fastiter.h: Added domain() and _bz_offsetData() - methods to FastArrayIteratorBase. - -2010-01-08 Patrik Jonsson - - * blitz/array/stencilops.h: Updated BZ_DECLARE_STENCIL_OPERATOR2/3 - to not require all types be the same, as this makes them useless - for expressions. - -2010-01-08 Patrik Jonsson - - * blitz/etbase.h: Added wrap() methods to explicitly convert ET - objects to ETBase. This is needed for the stencil operator - functions to match correctly. - -2010-01-07 Patrik Jonsson - - * blitz/indexexpr.h, blitz/array/expr.h, blitz/array/functorExpr.h, - blitz/array/map.h, blitz/array/reduce.h, blitz/array/where.h: - Added methods and typedefs necessary to support stencil - expressions. Code now passes all test cases. - -2010-01-07 Patrik Jonsson - - * blitz/array/stencil-et.h: Big cleanup. Removed the duplicate - classes name_et and name_etExpr, all expressions are now handled - by one class called name_et, derived from _bz_StencilExpr. - Improved prettyPrint. Made typedefs more consistent across - classes. - -2010-01-07 Patrik Jonsson - - * blitz/array/fastiter.h: Fixed a bug where operator= is the base - class was hidden. - -2010-01-06 Patrik Jonsson - - * blitz/funcs.h, blitz/indexexpr.h, blitz/array/asexpr.h, - blitz/array/expr.h, blitz/array/fastiter.h, blitz/array/map.h, - blitz/array/newet-macros.h, blitz/array/reduce.h, - blitz/array/stencil-et.h: Updated stencil operators in - stencil-et.h so they work with expressions, instead of just - arrays, and only applies themselves to the interior domain to - avoid overrunning the bounds. This necessitated adding some - methods to the classes in expr.h, and cloning FastArrayIterator - to FastArrayCopyIterator which keeps a copy of the array it - iterates over. stencil-et.h needs cleaning up. - diff --git a/INSTALL b/INSTALL deleted file mode 100644 index 8f5bf2e6..00000000 --- a/INSTALL +++ /dev/null @@ -1,44 +0,0 @@ -Installation procedure for Blitz++ - - -1. Installation - -1.1 From tarfile - -Unpack blitz-VERSION.tar.gz in the directory of your choice. The file -blitz-VERSION.tar.gz file will unpack into a subdirectory blitz-VERSION. - -Blitz++ now uses a standard CMake build workflow. For a typical system, -simply running "cmake ." with no options is sufficient. -CMake will explore your system to discover the available compilers and -appropriate options to use. To select a different C++ compiler than the -one discovered, give the option -DCMAKE_CXX_COMPILER=. -The standard configure option -DCMAKE_INSTALL_PREFIX= -may be used to select the installation directory for Blitz++. - -Once you have generated build files with CMake, do: - make lib - -This is a minimal build of just the Blitz++ library. You can also do: - - make check-testsuite Build and check the testsuite - make check-examples Build and check the examples - make check-benchmarks Build and check the benchmarks (long) - make install Install Blitz++ - make info Build Blitz++ texinfo documentation - make ps Build Blitz++ ps documentation - make pdf Build Blitz++ pdf documentation - make html Build Blitz++ html documentation - make install-info Install Blitz++ texinfo documentation - make install-ps Install Blitz++ ps documentation - make install-pdf Install Blitz++ pdf documentation - make install-html Install Blitz++ html documentation - -3. Getting started - -There are example programs in the "examples" directory. -The original Blitz++ documentation is found in the "manual" directory. -Point your browser at manual/index.html, or do "gv manual/blitz.ps". -However, this form of the documentation is no longer being actively -maintained. More up-to-date documentation is kept in the "doc" directory. - diff --git a/NEWS b/NEWS deleted file mode 100644 index 1f6fa9c9..00000000 --- a/NEWS +++ /dev/null @@ -1,13 +0,0 @@ - -** Blitz-0.10 - -Released June 27, 2012 - -This is an AS-IS snapshot release of the current state of the blitz mercurial -source code development repository. Besides numerous bug fixes since the -Blitz-0.9 release back in 2005, development work has been done in an effort -to improve performance, 64-bit compatibility, and thread safety, and to take -advantage of SIMD instructions by utilizing careful data alignment. Most of -these upgrades are not well documented as of yet, and are only apparent via -some new configure script options. - diff --git a/README-VS.NET.txt b/README-VS.NET.txt deleted file mode 100644 index 489a51b7..00000000 --- a/README-VS.NET.txt +++ /dev/null @@ -1,2 +0,0 @@ -The Blitz configuration file and Microsoft Visual Studio solution files -can be generated with CMake. diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 78f0c2e2..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,42 +0,0 @@ -platform: - - x86 - - x64 - -init: - - ps: | - If ($env:platform -Match "x86") { - $env:CMAKE_GEN_PLAT="Win32" - $env:PYTHON_ROOT="C:\Python27" - $env:Boost_ROOT="C:\Libraries\boost_1_69_0" - } Else { - $env:CMAKE_GEN_PLAT="x64" - $env:PYTHON_ROOT="C:\Python27-x64" - $env:Boost_ROOT="C:\Libraries\boost_1_69_0" - } - - cmd: C:/"Program Files (x86)"/"Microsoft Visual Studio 12.0"/VC/vcvarsall.bat - -install: - #- choco upgrade cmake - #- set path=C:\Program Files\CMake\bin;%path% - -before_build: - - del "C:\Program Files (x86)\MSBuild\14.0\Microsoft.Common.targets\ImportAfter\Xamarin.Common.targets" - -build_script: - - md build - - cd build - - cmake .. -DCMAKE_GENERATOR_PLATFORM="%CMAKE_GEN_PLAT%" - - - cmake --build . --config Debug - - cmake --build . --config Release - - - ctest -C Debug --output-on-failure - - ctest -C Release --output-on-failure - - - cmake --build . --target install - - - cmake .. -DCMAKE_GENERATOR_PLATFORM="%CMAKE_GEN_PLAT%" -DENABLE_SERIALISATION=1 - - cmake --build . --config Debug - - cmake --build . --config Release - - cmake --build . --target install - diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt deleted file mode 100644 index 329b378c..00000000 --- a/benchmarks/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -#set(OTHER_BENCH tinydaxpy hao-he iter) -set(OTHER_BENCH tinydaxpy iter) - -option(FORTRAN_BENCHMARKS "Enable Fortran benchmarks" OFF) -if (FORTRAN_BENCHMARKS) - enable_language(Fortran OPTIONAL) - set(daxpy_SOURCES daxpy.cpp fdaxpy.f fidaxpy.f) - set(stencil_SOURCES stencil.cpp stencilf.f stencilf2.f) - set(acoustic_SOURCES acoustic.cpp acousticf.f acousticf2.f) - set(acou3d_SOURCES acou3db1.cpp acou3db2.cpp acou3db3.cpp acou3db4.cpp \ acou3d.cpp acou3df.f acou3df2.f ) - set(LOOP_KERNELS loop1 loop2 loop3 loop5 loop6 loop8 loop9 loop10 loop11 - loop12 loop13 loop14 loop15 loop16 loop17 loop18 loop19 loop21 loop22 - loop23 loop24 loop25 loop36 loop100 floop1 floop2 floop3 floop5 floop6 floop8 floop9 floop10 floop11 - floop12 floop13 floop14 floop15 floop16 floop17 floop18 floop19 floop21 floop22 - floop23 floop24 floop25 floop36) - set(OTHER_BENCH ${OTHER_BENCH} arrdaxpy haney qcd stencil cfd) - if (CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(daxpy_SOURCES ${daxpy_SOURCES} daxpyf90.f90) - set(stencil_SOURCES ${stencil_SOURCES} stencilf90.f90) - set(acoustic_SOURCES ${acoustic_SOURCES} acousticf90.f90 acousticf902.f90) - set(acou3d_SOURCES ${acou3d_SOURCES} acou3df90.f90 acou3df902.f90) - endif() -endif() - -add_custom_target(benchmark) - -TESTS(benchmark ${LOOP_KERNELS} ${OTHER_BENCH}) - -add_custom_target(check-benchmarks - DEPENDS blitz benchmark - COMMAND ${CMAKE_BUILD_TOOL} test) - -set(COMPILE_TIME_BENCHMARKS ctime1 ctime2 ctime3 ctime4 ctime5 ctime1v ctime2v ctime3v ctime4v ctime5v) diff --git a/benchmarks/acou3d.cpp b/benchmarks/acou3d.cpp deleted file mode 100644 index 2cb251b8..00000000 --- a/benchmarks/acou3d.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#define BZ_DISABLE_RESTRICT - -#include -#include -#include - -#ifdef BZ_HAVE_STD - #include -#else - #include -#endif - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define acoustic3d_f90 acoustic3d_f90_ - #define acoustic3d_f77 acoustic3d_f77_ - #define acoustic3d_f90tuned acoustic3d_f90tuned_ - #define acoustic3d_f77tuned acoustic3d_f77tuned_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define acoustic3d_f90 acoustic3d_f90__ - #define acoustic3d_f77 acoustic3d_f77__ - #define acoustic3d_f90tuned acoustic3d_f90tuned__ - #define acoustic3d_f77tuned acoustic3d_f77tuned__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define acoustic3d_f90 ACOUSTIC3D_F90 - #define acoustic3d_f77 ACOUSTIC3D_F77 - #define acoustic3d_f90tuned ACOUSTIC3D_F90TUNED - #define acoustic3d_f77tuned ACOUSTIC3D_F77TUNED -#endif - -extern "C" { -void acoustic3d_f90(int& N, int& niters, float& check); -void acoustic3d_f77(int& N, int& niters, float& check); -void acoustic3d_f90tuned(int& N, int& niters, float& check); -void acoustic3d_f77tuned(int& N, int& niters, float& check); -} - -float acoustic3D_BlitzRaw(int N, int niters); -float acoustic3D_BlitzInterlacedCycled(int N, int niters); -float acoustic3D_BlitzCycled(int N, int niters); -float acoustic3D_BlitzStencil(int N, int niters); - - -void output_data(const char* type, const Timer& t, float check, double Gflops) -{ - cout << type << ": " << t.elapsed() - << t.indep_var() << " check = " - << check << " Gflop/" << t.indep_var() << " = " - << (Gflops/t.elapsed()) - << endl << endl; -} - -int main() -{ - Timer timer; - int N = 112; - int niters = 210; // Must be divisible by 3 for tuned Fortran versions - float check; - - cout << "Acoustic 3D Benchmark" << endl << endl; - - double Gflops = (N-2)*(N-2)*(N-2) * 11.0 * niters / 1.0e+9; - - generateFastTraversalOrder(TinyVector(N-2,N-2)); - - timer.start(); - check = acoustic3D_BlitzRaw(N, niters); - timer.stop(); - output_data("Blitz++ (raw)", timer, check, Gflops); - - timer.start(); - check = acoustic3D_BlitzStencil(N, niters); - timer.stop(); - output_data("Blitz++ (stencil)", timer, check, Gflops); - -#if 0 - timer.start(); - check = acoustic3D_BlitzInterlaced(N, niters, c); - timer.stop(); - output_data("Blitz++ (interlaced)", timer, check, Gflops); -#endif - - timer.start(); - check = acoustic3D_BlitzCycled(N, niters); - timer.stop(); - output_data("Blitz++ (cycled)", timer, check, Gflops); - - timer.start(); - check = acoustic3D_BlitzInterlacedCycled(N, niters); - timer.stop(); - output_data("Blitz++ (interlaced & cycled)", timer, check, Gflops); - -#ifdef FORTRAN_90 - timer.start(); - acoustic3d_f90(N, niters, check); - timer.stop(); - output_data("Fortran 90", timer, check, Gflops); - - timer.start(); - acoustic3d_f90tuned(N, niters, check); - timer.stop(); - output_data("Fortran 90 (tuned)", timer, check, Gflops); -#endif - - timer.start(); - acoustic3d_f77(N, niters, check); - timer.stop(); - output_data("Fortran 77", timer, check, Gflops); - - timer.start(); - acoustic3d_f77tuned(N, niters, check); - timer.stop(); - output_data("Fortran 77 (tuned)", timer, check, Gflops); - - return 0; -} - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -void snapshot(const Array& P, const Array& c); - -void checkArray(const Array& A, int N); - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N) -{ - // Set the velocity field - c(Range(0,N/2-1), Range::all(), Range::all()) = 0.05; - c(Range(N/2,N-1), Range::all(), Range::all()) = 0.3; - - double Nfp = static_cast(N); - int cavityLeft = static_cast(3*Nfp/7-1); - int cavityRight = static_cast(4*Nfp/7-1); - int cavityFront = static_cast(3*Nfp/7-1); - int cavityBack = static_cast(4*Nfp/7-1); - int cavityTop = static_cast(5*Nfp/7-1); - int cavityBottom = static_cast(6*Nfp/7-1); - - c(Range(cavityTop,cavityBottom),Range(cavityLeft,cavityRight), - Range(cavityFront,cavityBack)) = 0.02; - - int cavityTop2 = static_cast(1*Nfp/7-1); - int cavityBottom2 = static_cast(2*Nfp/7-1); - c(Range(cavityTop2,cavityBottom2),Range(cavityLeft,cavityRight), - Range(cavityFront,cavityBack)) = 0.001; - - // Initial pressure distribution - using namespace blitz::tensor; - - float NN = N; - float ci = N/2-1; - float cj = N/2-1; - float ck = N/2-1; - // pow2 is an ET-only function, it's not defined for POD types - float s2 = 64.0 * 9.0 / pow(NN/2.0, 2); - P1 = 0.0; - P2 = exp(-(pow2(i-ci)+pow2(j-cj)+pow2(k-ck)) * s2); - P3 = 0.0; - - checkArray(P2, N); - checkArray(c, N); -} - -void checkArray(const Array& A, int N) -{ - double check = 0.0; - - for (int i=0; i < N; ++i) - for (int j=0; j < N; ++j) - for (int k=0; k < N; ++k) - check += A(i,j,k) * ((i+1)+N*(j+1)+N*N*(k+1)); - - cout << "Array check: " << check << endl; -} - -void snapshot(const Array& P, const Array& c) -{ - static int count = 0, snapshotNum = 0; - if (++count < 5) - return; - - count = 0; - ++snapshotNum; - char filename[128]; - sprintf(filename, "snapshot%03d.m", snapshotNum); - - ofstream ofs(filename); - int N = P.length(firstDim); - - int k = N/2; - float Pmin = -0.2; - float PScale = 1.0/0.4; - float VScale = 0.5; - - ofs << "P" << snapshotNum << " = [ "; - for (int i=0; i < N; ++i) - { - for (int j=0; j < N; ++j) - { - float value = (P(i,j,k)-Pmin)*PScale + c(i,j,k)*VScale; - int r = static_cast(value * 4096); - ofs << r << " "; - } - if (i < N-1) - ofs << ";" << endl; - } - ofs << "];" << endl; -} - diff --git a/benchmarks/acou3db1.cpp b/benchmarks/acou3db1.cpp deleted file mode 100644 index 28850b12..00000000 --- a/benchmarks/acou3db1.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -float acoustic3D_BlitzRaw(int N, int niters) -{ - // Allocate the arrays as a group. Blitz++ will interlace them in - // memory, improving data locality. - - Array P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N); - Range I(1,N-2), J(1,N-2), K(1,N-2); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); - - P1 = P2; - P2 = P3; - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3db2.cpp b/benchmarks/acou3db2.cpp deleted file mode 100644 index a95c2142..00000000 --- a/benchmarks/acou3db2.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -float acoustic3D_BlitzInterlacedCycled(int N, int niters) -{ - // Allocate the arrays as a group. Blitz++ will interlace them in - // memory, improving data locality. - - Array P1, P2, P3, c; - allocateArrays(shape(N,N,N), P1, P2, P3, c); - Range I(1,N-2), J(1,N-2), K(1,N-2); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); - - cycleArrays(P1, P2, P3); - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3db3.cpp b/benchmarks/acou3db3.cpp deleted file mode 100644 index b7da30b0..00000000 --- a/benchmarks/acou3db3.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -float acoustic3D_BlitzCycled(int N, int niters) -{ - Array P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N); - Range I(1,N-2), J(1,N-2), K(1,N-2); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); - - cycleArrays(P1, P2, P3); - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3db4.cpp b/benchmarks/acou3db4.cpp deleted file mode 100644 index dcbd46ae..00000000 --- a/benchmarks/acou3db4.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - -using namespace blitz; - -void setupInitialConditions(Array& P1, Array& P2, - Array& P3, Array& c, int N); - -BZ_DECLARE_STENCIL4(acoustic3D, P1, P2, P3, c) - P3 = 2 * P2 + c * Laplacian3D_stencilop(P2) - P1; -BZ_END_STENCIL - -float acoustic3D_BlitzStencil(int N, int niters) -{ - Array P1, P2, P3, c; - allocateArrays(shape(N,N,N), P1, P2, P3, c); - - setupInitialConditions(P1, P2, P3, c, N); - - for (int iter=0; iter < niters; ++iter) - { - applyStencil(acoustic3D(), P1, P2, P3, c); - cycleArrays(P1, P2, P3); - } - - return P1(N/2,N/2,N/2); -} - diff --git a/benchmarks/acou3df.f b/benchmarks/acou3df.f deleted file mode 100644 index f586ef91..00000000 --- a/benchmarks/acou3df.f +++ /dev/null @@ -1,134 +0,0 @@ -! INTEGER N, iters -! REAL check - -! N = 112 -! iters = 210 -! CALL acoustic3d_f77(N,iters,check) -! PRINT *, check -! END - - SUBROUTINE acoustic3d_f77(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), C(N,N,N) - INTEGER i, j, k - - CALL acoust3d_f77_setup(P1, P2, P3, C, N) - - DO iter=1, niters - DO k=2,N-1 - DO j=2,N-1 - DO i=2,N-1 - P3(i,j,k) = (2-6*C(i,j,k))*P2(i,j,k) + C(i,j,k) - . * (P2(i,j-1,k) + P2(i,j+1,k) + P2(i-1,j,k) - . + P2(i+1,j,k) + P2(i,j,k-1) + P2(i,j,k+1)) - . - P1(i,j,k) - END DO - END DO - END DO - - DO k=1,N - DO j=1,N - DO i=1,N - P1(i,j,k) = P2(i,j,k) - P2(i,j,k) = P3(i,j,k) - END DO - END DO - END DO - END DO - - check = P1(N/2,N/2,N/2) - - RETURN - END - - - - SUBROUTINE acoust3d_f77_setup(P1, P2, P3, c, N) - INTEGER N - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N) - INTEGER i, j, k - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, - . cavityTop, cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set up velocity field - - DO k=1,N - DO j=1,N - DO i=1,N/2 - c(i,j,k) = 0.05 - END DO - DO i=N/2+1,N - c(i,j,k) = 0.3 - END DO - END DO - END DO - -! Cavities - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - DO k=cavityFront, cavityBack - DO j=cavityLeft, cavityRight - DO i=cavityTop, cavityBottom - c(i,j,k) = 0.02 - END DO - DO i=cavityTop2, cavityBottom2 - c(i,j,k) = 0.001 - END DO - END DO - END DO - -! Initial pressure distribution - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P1(i,j,k) = 0.0 - P2(i,j,k) = exp(- ((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - P3(i,j,k) = 0.0 - END DO - END DO - END DO - - CALL acoust3d_f77_setup_check(P2, N) - CALL acoust3d_f77_setup_check(c, N) - - RETURN - END - - - - SUBROUTINE acoust3d_f77_setup_check(A, N) - INTEGER N - REAL A(N,N,N) - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + A(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - - RETURN - END - diff --git a/benchmarks/acou3df2.f b/benchmarks/acou3df2.f deleted file mode 100644 index 935c169f..00000000 --- a/benchmarks/acou3df2.f +++ /dev/null @@ -1,160 +0,0 @@ -! -! Tuned Fortran 77 version -! Optimizations: -! - Rather than four separate arrays, one 4D array is allocated. This -! allows P1, P2, P3, and C to be interlaced in memory, improving data -! locality -! - The stencil is tiled to improve cache usage -! - Instead of copying, the indices into the 4D array are shuffled. - -! INTEGER N, iters -! REAL check - -! N = 112 -! iters = 210 -! CALL acoustic3d_f77Tuned(N,iters,check) -! PRINT *, check -! END - - SUBROUTINE acoustic3d_f77Tuned(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), C(N,N,N) - - CALL acoust3d_f77Tuned_setup(P1,P2,P3,C,N) - - DO iter = 1, niters, 3 - CALL acoustic3d_f77Tuned_stencil(P1,P2,P3,C,N) - CALL acoustic3d_f77Tuned_stencil(P2,P3,P1,C,N) - CALL acoustic3d_f77Tuned_stencil(P3,P1,P2,C,N) - END DO - - check = P1(N/2,N/2,N/2) - - RETURN - END - - - - SUBROUTINE acoustic3d_f77Tuned_stencil(P1,P2,P3,C,N) - INTEGER N - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), C(N,N,N) - INTEGER i,j,k - INTEGER bi,bj,bk,ni,nj,nk,blockSize - - blockSize = 8 - - DO bk=2,N-1,blockSize - nk = min(bk+blockSize-1,N-1) - DO bj=2,N-1,blockSize - nj = min(bj+blockSize-1,N-1) - DO bi=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - DO k=bk,nk - DO j=bj,nj - DO i=bi,ni - P3(i,j,k) = (2-6*C(i,j,k))*P2(i,j,k) - . + C(i,j,k) * (P2(i,j-1,k) + P2(i,j+1,k) - . + P2(i-1,j,k) + P2(i+1,j,k) + P2(i,j,k-1) - . + P2(i,j,k+1)) - P1(i,j,k) - END DO - END DO - END DO - END DO - END DO - END DO - - RETURN - END - - - - - SUBROUTINE acoust3d_f77Tuned_setup(P1, P2, P3, c, N) - INTEGER N - REAL P1(N,N,N), P2(N,N,N), P3(N,N,N), c(N,N,N) - INTEGER i, j, k - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, - . cavityTop, cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set up velocity field - - DO k=1,N - DO j=1,N - DO i=1,N/2 - c(i,j,k) = 0.05 - END DO - DO i=N/2+1,N - c(i,j,k) = 0.3 - END DO - END DO - END DO - -! Cavities - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - DO k=cavityFront, cavityBack - DO j=cavityLeft, cavityRight - DO i=cavityTop, cavityBottom - c(i,j,k) = 0.02 - END DO - DO i=cavityTop2, cavityBottom2 - c(i,j,k) = 0.001 - END DO - END DO - END DO - -! Initial pressure distribution - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P1(i,j,k) = 0.0 - P2(i,j,k) = exp(- ((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - P3(i,j,k) = 0.0 - END DO - END DO - END DO - - CALL acoust3d_f77Tuned_setup_check(P2, N) - CALL acoust3d_f77Tuned_setup_check(c, N) - - RETURN - END - - - - SUBROUTINE acoust3d_f77Tuned_setup_check(P, N) - INTEGER N - REAL P(N,N,N) - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + P(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - - RETURN - END - diff --git a/benchmarks/acou3df90.f90 b/benchmarks/acou3df90.f90 deleted file mode 100644 index 47bcba76..00000000 --- a/benchmarks/acou3df90.f90 +++ /dev/null @@ -1,95 +0,0 @@ - -SUBROUTINE acoustic3d_f90(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - REAL, DIMENSION (N,N,N) :: P1, P2, P3, c - INTEGER iter - - CALL acoustic3d_f90_setup(P1, P2, P3, c, N) - - DO iter=1, niters - P3(2:N-1,2:N-1,2:N-1) = (2-6*c(2:N-1,2:N-1,2:N-1)) & - * P2(2:N-1,2:N-1,2:N-1) & - + c(2:N-1,2:N-1,2:N-1)*(P2(1:N-2,2:N-1,2:N-1) + P2(3:N,2:N-1,2:N-1) & - + P2(2:N-1,1:N-2,2:N-1)+P2(2:N-1,3:N,2:N-1) & - + P2(2:N-1,2:N-1,1:N-2)+P2(2:N-1,2:N-1,3:N)) - P1(2:N-1,2:N-1,2:N-1) - P1 = P2 - P2 = P3 - END DO - - check = P1(N/2,N/2,N/2) - - RETURN -END - -SUBROUTINE acoustic3d_f90_setup(P1, P2, P3, c, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N,N), INTENT( INOUT ) :: P1, P2, P3, c - - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, cavityTop, & - cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set the velocity field - - c(1:N/2,:,:) = 0.05 - c(N/2+1:N,:,:) = 0.3; - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - c(cavityTop:cavityBottom,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.02; - c(cavityTop2:cavityBottom2,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.001; - -! Initial pressure distribution - P1 = 0.0 - P3 = 0.0 - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P2(i,j,k) = exp(-((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - END DO - END DO - END DO - - CALL acoustic3d_f90_setup_check(P2, N) - CALL acoustic3d_f90_setup_check(c, N) - - RETURN -END - - -SUBROUTINE acoustic3d_f90_setup_check(A, N) - INTEGER, INTENT( IN ) :: N - REAL, INTENT( IN ), DIMENSION(N,N,N) :: A - - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + A(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN -END - diff --git a/benchmarks/acou3df902.f90 b/benchmarks/acou3df902.f90 deleted file mode 100644 index 84f30295..00000000 --- a/benchmarks/acou3df902.f90 +++ /dev/null @@ -1,119 +0,0 @@ -! -! F90 Tuned version -! Optimizations: -! - One 4D array allocated, other arrays are slices of this array. This -! arrangement interlaces the arrays in memory, improving data locality -! - Rather than copying arrays, the indices into the 4D array are copied -! - Rely on compiler for tiling. - -SUBROUTINE acoustic3d_f90Tuned(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - REAL, DIMENSION (N,N,N) :: P1, P2, P3, C - INTEGER iter - - CALL acoustic3d_f90Tuned_setup(P1, P2, P3, c, N) - - IF (MODULO(niters,3) > 0) THEN - PRINT *, 'Warning: In acoustic3d_f90Tuned: niters mod 3 != 0' - PRINT *, 'Will do fewer iterations, benchmark result will be off' - ENDIF - - DO iter=1, niters, 3 - CALL acoustic3d_f90Tuned_stencil(P1, P2, P3, C, N) - CALL acoustic3d_f90Tuned_stencil(P2, P3, P1, C, N) - CALL acoustic3d_f90Tuned_stencil(P3, P1, P2, C, N) - END DO - - check = P1(N/2,N/2,N/2) - - RETURN -END - -SUBROUTINE acoustic3d_f90Tuned_stencil(P1, P2, P3, C, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N,N), INTENT( IN ) :: P1, P2, C - REAL, DIMENSION (N,N,N), INTENT( OUT ) :: P3 - - P3(2:N-1,2:N-1,2:N-1) = (2-6*C(2:N-1,2:N-1,2:N-1)) & - * P2(2:N-1,2:N-1,2:N-1) + C(2:N-1,2:N-1,2:N-1) & - * (P2(1:N-2,2:N-1,2:N-1) + P2(3:N,2:N-1,2:N-1) & - + P2(2:N-1,1:N-2,2:N-1) + P2(2:N-1,3:N,2:N-1) & - + P2(2:N-1,2:N-1,1:N-2)+P2(2:N-1,2:N-1,3:N)) & - - P1(2:N-1,2:N-1,2:N-1) - RETURN -END - - -SUBROUTINE acoustic3d_f90Tuned_setup(P1, P2, P3, c, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N,N), INTENT( INOUT ) :: P1, P2, P3, c - - INTEGER cavityLeft, cavityRight, cavityFront, cavityBack, cavityTop, & - cavityBottom, cavityTop2, cavityBottom2 - REAL ci, cj, ck, s2 - -! Set the velocity field - - c(1:N/2,:,:) = 0.05 - c(N/2+1:N,:,:) = 0.3; - - cavityLeft = 3 * N / 7.0 - cavityRight = 4 * N / 7.0 - cavityFront = 3 * N / 7.0 - cavityBack = 4 * N / 7.0 - cavityTop = 5 * N / 7.0 - cavityBottom = 6 * N / 7.0 - cavityTop2 = 1 * N / 7.0 - cavityBottom2 = 2 * N / 7.0 - - c(cavityTop:cavityBottom,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.02; - c(cavityTop2:cavityBottom2,cavityLeft:cavityRight, & - cavityFront:cavityBack) = 0.001; - -! Initial pressure distribution - P1 = 0.0 - P3 = 0.0 - - ci = N/2.0 - cj = N/2.0 - ck = N/2.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO k=1,N - DO j=1,N - DO i=1,N - P2(i,j,k) = exp(-((i-ci)**2+(j-cj)**2+(k-ck)**2) * s2) - END DO - END DO - END DO - - CALL acoustic3d_f90Tuned_setup_check(P2, N) - CALL acoustic3d_f90Tuned_setup_check(c, N) - - RETURN -END - - -SUBROUTINE acoustic3d_f90Tuned_setup_check(P, N) - INTEGER, INTENT( IN ) :: N - REAL, INTENT( IN ), DIMENSION(N,N,N) :: P - - INTEGER i, j, k - REAL check - - check = 0.0 - DO k=1,N - DO j=1,N - DO i=1,N - check = check + P(i,j,k) * (i+N*j+N*N*k) - END DO - END DO - END DO - - PRINT *, 'Array check: ', check - - RETURN -END - diff --git a/benchmarks/acoustic.cpp b/benchmarks/acoustic.cpp deleted file mode 100644 index a3a245d5..00000000 --- a/benchmarks/acoustic.cpp +++ /dev/null @@ -1,369 +0,0 @@ -//#define BZ_DISABLE_RESTRICT -#define BZ_ARRAY_2D_NEW_STENCIL_TILING - -#include -#include -#include -#include - -#ifdef BZ_HAVE_STD - #include -#else - #include -#endif - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define echo_f90 echo_f90_ - #define echo_f77 echo_f77_ - #define echo_f90_tuned echo_f90_tuned_ - #define echo_f77tuned echo_f77tuned_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define echo_f90 echo_f90__ - #define echo_f77 echo_f77__ - #define echo_f90_tuned echo_f90_tuned__ - #define echo_f77tuned echo_f77tuned__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define echo_f90 ECHO_F90 - #define echo_f77 ECHO_F77 - #define echo_f90_tuned ECHO_F90_TUNED - #define echo_f77tuned ECHO_F77TUNED -#endif - -extern "C" { -void echo_f90(int& N, int& niters, float& check); -void echo_f77(int& N, int& niters, float& check); -void echo_f90_tuned(int& N, int& niters, float& check); -void echo_f77tuned(int& N, int& niters, float& check); -} - -void f77(BenchmarkExt&); -void f90(BenchmarkExt&); -void f77_tuned(BenchmarkExt&); -void f90_tuned(BenchmarkExt&); - -void echo_BlitzInterlacedCycled(BenchmarkExt&); -void echo_BlitzCycled(BenchmarkExt&); -void echo_BlitzRaw(BenchmarkExt&); -void echo_BlitzStencil(BenchmarkExt&); - -int main() -{ - Timer timer; - float check; - int numBenchmarks = 6; -#ifdef FORTRAN_90 - numBenchmarks+=2; -#endif - - BenchmarkExt bench("Acoustic 2D Benchmark", numBenchmarks); - const int numSizes=7; - bench.setNumParameters(numSizes); - Vector parameters(numSizes); - parameters=10*pow(2.0,tensor::i); - Vector flops(numSizes); - flops=(parameters-2)*(parameters-2) * 9.0; - Vector iters(numSizes); - // iters must be divisible by 3 for tuned fortran versions - iters=cast(100000000/flops)*3; - - bench.setParameterVector(parameters); - bench.setParameterDescription("Matrix size"); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - echo_BlitzRaw(bench); - echo_BlitzStencil(bench); - -#if 0 - echo_BlitzInterlaced(bench, c); -#endif - - echo_BlitzCycled(bench); - echo_BlitzInterlacedCycled(bench); - -#ifdef FORTRAN_90 - f90(bench); - f90_tuned(bench); -#endif - - f77(bench); - f77_tuned(bench); - - bench.endBenchmarking(); - bench.saveMatlabGraph("acoustic.m"); - - return 0; -} - -void checkArray(Array& A, int N) -{ - float check = 0.0; - for (int i=0; i < N; ++i) - for (int j=0; j < N; ++j) - check += ((i+1)*N + j + 1) * A(i,j); - - cout << "Array check: " << check << endl; -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N); - - -void echo_BlitzRaw(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (raw)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - - Array P1(N,N), P2(N,N), P3(N,N), c(N,N); - Range I(1,N-2), J(1,N-2); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - P1 = P2; - P2 = P3; - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); - - -#if 0 -ofstream ofs("testecho.m"); -ofs << "A = ["; -for (int i=0; i < N; ++i) -{ - for (int j=0; j < N; ++j) - { - ofs << int(8192*P2(i,j)+1024*c(i,j)) << " "; - } - if (i < N-1) - ofs << ";" << endl; -} -ofs << "];" << endl; -#endif - -} - -void echo_BlitzCycled(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (cycled)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - - Array P1(N,N), P2(N,N), P3(N,N), c(N,N); - Range I(1,N-2), J(1,N-2); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - cycleArrays(P1,P2,P3); - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); -} - -void echo_BlitzInterlacedCycled(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (interlaced & cycled)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - - Array P1, P2, P3, c; - allocateArrays(shape(N,N), P1, P2, P3, c); - Range I(1,N-2), J(1,N-2); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - cycleArrays(P1,P2,P3); - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); -} - -BZ_DECLARE_STENCIL4(acoustic2D,P1,P2,P3,c) - P3 = 2 * P2 + c * Laplacian2D_stencilop(P2) - P1; -BZ_STENCIL_END - -void echo_BlitzStencil(BenchmarkExt&bench) -{ - bench.beginImplementation("Blitz++ (stencil)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - - Array P1, P2, P3, c; - allocateArrays(shape(N,N), P1, P2, P3, c); - - setInitialConditions(c, P1, P2, P3, N); - checkArray(P2, N); - checkArray(c, N); - - bench.start(); - for (int iter=0; iter < niters; ++iter) - { - applyStencil(acoustic2D(), P1, P2, P3, c); - cycleArrays(P1,P2,P3); - } - bench.stop(); - - cout << P1(N/2-1,(7*N)/8-1) << endl; - } - - bench.endImplementation(); -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N) -{ - // Set the velocity field - c = 0.2; - - // Solid block with which the pulse collides - int blockLeft = 0; - int blockRight = int(2*N/5.0-1); - int blockTop = int(N/3-1); - int blockBottom = int(2*N/3.0-1); - c(Range(blockTop,blockBottom),Range(blockLeft,blockRight)) = 0.5; - - // Channel directing the pulse leftwards - int channelLeft = int(4*N/5.0-1); - int channelRight = N-1; - int channel1Height = int(3*N/8.0-1); - int channel2Height = int(5*N/8.0-1); - c(channel1Height,Range(channelLeft,channelRight)) = 0.0; - c(channel2Height,Range(channelLeft,channelRight)) = 0.0; - - // Initial pressure distribution: gaussian pulse inside the channel - using namespace blitz::tensor; - int cr = int(N/2-1); - int cc = int(7.0*N/8.0-1); - // pow2 is not defined for pod types. - float s2 = 64.0 * 9.0 / pow(N/2.0,2); - cout << "cr = " << cr << " cc = " << cc << " s2 = " << s2 << endl; - P1 = 0.0; - P2 = exp(-(pow2(i-cr)+pow2(j-cc)) * s2); - P3 = 0.0; -} - - -void f77(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran77"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f77(N, niters, check); - bench.stop(); - cout << check << endl; - } - bench.endImplementation(); -}; - -void f77_tuned(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran77 (tuned)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f77tuned(N, niters, check); - bench.stop(); - cout << check << endl; - } - - bench.endImplementation(); -}; - -void f90(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran90"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f90(N, niters, check); - bench.stop(); - cout << check << endl; - } - - bench.endImplementation(); -}; -void f90_tuned(BenchmarkExt&bench) -{ - bench.beginImplementation("Fortran90 (tuned)"); - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - int niters = bench.getIterations(); - cout << bench.currentImplementation() << " N=" << N << endl; - float check; - bench.start(); - echo_f90_tuned(N, niters, check); - bench.stop(); - cout << check << endl; - } - - bench.endImplementation(); -}; diff --git a/benchmarks/acousticf.f b/benchmarks/acousticf.f deleted file mode 100644 index 868f582b..00000000 --- a/benchmarks/acousticf.f +++ /dev/null @@ -1,121 +0,0 @@ - -! INTEGER N, iters -! REAL check - -! N = 128 -! iters = N*3 -! CALL echo_f77(N,iters,check) -! PRINT *, check -! END - - - - SUBROUTINE echo_f77(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N), P2(N,N), P3(N,N), C(N,N) - INTEGER i, j - - CALL echo_f77_set(c, P1, P2, P3, N) - CALL checkArray(P2, N) - CALL checkArray(c, N) - - DO iter=1, niters - DO j=2,N-1 - DO i=2,N-1 - P3(i,j) = (2-4*c(i,j))*P2(i,j) + c(i,j)*(P2(i,j-1) - . + P2(i,j+1) + P2(i-1,j) + P2(i+1,j)) - P1(i,j) - END DO - END DO - - DO j=1,N - DO i=1,N - P1(i,j) = P2(i,j) - P2(i,j) = P3(i,j) - END DO - END DO - END DO - - check = P1(N/2,7*N/8) - - RETURN - END - - - SUBROUTINE echo_f77_set(c, P1, P2, P3, N) - INTEGER N - REAL c(N,N), P1(N,N), P2(N,N), P3(N,N) - - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - c(i,j) = 0.2 - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - c(i,j) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - c(channel1Height,j) = 0.0 - c(channel2Height,j) = 0.0 - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - print *, 'cr = ', cr, ' cc = ', cc, ' s2 = ', s2 - - DO j=1,N - DO i=1,N - P1(i,j) = 0.0 - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - P3(i,j) = 0.0 - END DO - END DO - - RETURN - END - - SUBROUTINE checkArray(A, N) - INTEGER N - REAL A(N,N) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END diff --git a/benchmarks/acousticf2.f b/benchmarks/acousticf2.f deleted file mode 100644 index e117bffc..00000000 --- a/benchmarks/acousticf2.f +++ /dev/null @@ -1,163 +0,0 @@ -! INTEGER N, iters -! REAL check - -! N = 128 -! iters = N*3 -! CALL echo_f77Tuned(N,iters,check) -! PRINT *, check -! END - - - - SUBROUTINE echo_f77Tuned(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL P1(N,N), P2(N,N), P3(N,N), C(N,N) - INTEGER i, j - INTEGER nitersd3, remainder - - CALL echo_f77_set2(c, P1, P2, P3, N) - CALL checkArray2(P2, N) - CALL checkArray2(c, N) - - nitersd3 = niters / 3 - remainder = niters - 3 * nitersd3 - IF (remainder .NE. 0) THEN - PRINT *, 'niters should be divisible by 3, results will be off' - ENDIF - - DO iter=1, niters, 3 - CALL stencil5(c, P1, P2, P3, N) - CALL stencil5(c, P2, P3, P1, N) - CALL stencil5(c, P3, P1, P2, N) - END DO - - check = P1(N/2,7*N/8) - - RETURN - END - - - - SUBROUTINE echo_f77_set2(c, P1, P2, P3, N) - INTEGER N - REAL c(N,N), P1(N,N), P2(N,N), P3(N,N) - - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - c(i,j) = 0.2 - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - c(i,j) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - c(channel1Height,j) = 0.0 - c(channel2Height,j) = 0.0 - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - print *, 'cr = ', cr, ' cc = ', cc, ' s2 = ', s2 - - DO j=1,N - DO i=1,N - P1(i,j) = 0.0 - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - P3(i,j) = 0.0 - END DO - END DO - - RETURN - END - - - SUBROUTINE stencil5(c, P1, P2, P3, N) - INTEGER N - REAL c(N,N), P1(N,N), P2(N,N), P3(N,N) - REAL tmp1, tmp2, tmp3 - INTEGER TileWidth, TileHeight, bj, nj, bi, ni, i - - TileWidth = 16 - TileHeight = 3 - - DO bj=2, N-1, TileWidth - nj = MIN(bj+TileWidth-1, N-1) - - DO bi=2, N-1, TileHeight - IF (bi+TileHeight .LT. N) THEN - i = bi - DO j=bj,nj - tmp1 = (2-4*c(i,j))*P2(i,j) + c(i,j)*(P2(i,j-1) - . + P2(i,j+1) + P2(i-1,j) + P2(i+1,j)) - P1(i,j) - tmp2 = (2-4*c(i+1,j))*P2(i+1,j) + c(i+1,j) - . *(P2(i+1,j-1) + P2(i+1,j+1) + P2(i,j) + P2(i+2,j)) - . - P1(i+1,j) - tmp3 = (2-4*c(i+2,j))*P2(i+2,j) + c(i+2,j) - . *(P2(i+2,j-1) + P2(i+2,j+1) + P2(i+1,j) + P2(i+3,j)) - . - P1(i+2,j) - P3(i,j) = tmp1 - P3(i+1,j) = tmp2 - P3(i+2,j) = tmp3 - END DO - ELSE - DO i=bi, N-1 - DO j=bj,nj - P3(i,j) = (2-4*c(i,j))*P2(i,j) + c(i,j)*(P2(i,j-1) - . + P2(i,j+1) + P2(i-1,j) + P2(i+1,j)) - P1(i,j) - END DO - END DO - END IF - END DO - END DO - - RETURN - END - - - SUBROUTINE checkArray2(A, N) - INTEGER N - REAL A(N,N) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END - diff --git a/benchmarks/acousticf90.f90 b/benchmarks/acousticf90.f90 deleted file mode 100644 index 7c47069a..00000000 --- a/benchmarks/acousticf90.f90 +++ /dev/null @@ -1,101 +0,0 @@ -!INTEGER N, niters -!REAL check -!N = 128 -!niters = 128*3 -!CALL echo_f90(N, niters, check) -!PRINT *, check -!END - -SUBROUTINE echo_f90(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - - REAL, DIMENSION (N,N) :: P1, P2, P3, c - INTEGER iter - - CALL echo_f90_setupInitialConditions(c, P1, P2, P3, N) - CALL checkArray_f90(P2, N) - CALL checkArray_f90(c, N) - - DO iter=1, niters - P3(2:N-1,2:N-1) = (2-4*c(2:N-1,2:N-1)) * P2(2:N-1,2:N-1) & - + c(2:N-1,2:N-1)*(P2(1:N-2,2:N-1) + P2(3:N,2:N-1) & - + P2(2:N-1,1:N-2) + P2(2:N-1,3:N)) - P1(2:N-1,2:N-1) - P1 = P2 - P2 = P3 - END DO - - check = P1(N/2,7*N/8) - - RETURN -END - - - - - -SUBROUTINE echo_f90_setupInitialConditions(c, P1, P2, P3, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N) :: P1(N,N), P2(N,N), P3(N,N), c(N,N) - - INTEGER blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - REAL cr, cc - INTEGER i, j - REAL s2 - - ! Set the velocity field - c = 0.2 - - ! Solid block with which the pulse collides - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - c(blockTop:blockBottom, blockLeft:blockRight) = 0.5 - - ! Channel directing the pulse leftwards - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - c(channel1Height,channelLeft:channelRight) = 0.0; - c(channel2Height,channelLeft:channelRight) = 0.0; - - ! Initial pressure distribution: a gaussian pulse inside the channel - cr = N / 2.0 - cc = 7.0 * N / 8.0 - s2 = 64.0 * 9.0 / ((N / 2.0) ** 2) - - DO j=1,N - DO i=1,N - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - END DO - END DO - - P1 = 0.0 - P3 = 0.0 -END - - - - - - SUBROUTINE checkArray_f90(A, N) - INTEGER N - REAL, DIMENSION(N,N) :: A - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END - diff --git a/benchmarks/acousticf902.f90 b/benchmarks/acousticf902.f90 deleted file mode 100644 index db2ee345..00000000 --- a/benchmarks/acousticf902.f90 +++ /dev/null @@ -1,116 +0,0 @@ -!INTEGER N, niters -!REAL check -!N = 128 -!niters = 128*3 -!CALL echo_f90(N, niters, check) -!PRINT *, check -!END - -SUBROUTINE echo_f90_tuned(N, niters, check) - INTEGER, INTENT( IN ) :: N, niters - REAL, INTENT( OUT ) :: check - - REAL, DIMENSION (N,N) :: P1, P2, P3, c - INTEGER iter - - CALL echo_f90_tuned_setup(c, P1, P2, P3, N) - CALL checkArray_f90_tuned(P2, N) - CALL checkArray_f90_tuned(c, N) - - IF (MODULO(niters, 3) > 0) THEN - PRINT *, 'niters should be divisible by 3, results will be off' - ENDIF - - DO iter=1, niters, 3 - CALL stencil_f90(c, P1, P2, P3, N) - CALL stencil_f90(c, P2, P3, P1, N) - CALL stencil_f90(c, P3, P1, P2, N) - END DO - - check = P1(N/2,7*N/8) - - RETURN -END - - - -SUBROUTINE stencil_f90(c, P1, P2, P3, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION(N,N), INTENT( INOUT ) :: c, P1, P2, P3 - - P3(2:N-1,2:N-1) = (2-4*c(2:N-1,2:N-1)) * P2(2:N-1,2:N-1) & - + c(2:N-1,2:N-1)*(P2(1:N-2,2:N-1) + P2(3:N,2:N-1) & - + P2(2:N-1,1:N-2) + P2(2:N-1,3:N)) - P1(2:N-1,2:N-1) - - RETURN -END - - - - - -SUBROUTINE echo_f90_tuned_setup(c, P1, P2, P3, N) - INTEGER, INTENT( IN ) :: N - REAL, DIMENSION (N,N) :: P1(N,N), P2(N,N), P3(N,N), c(N,N) - - INTEGER blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - REAL cr, cc - INTEGER i, j - REAL s2 - - ! Set the velocity field - c = 0.2 - - ! Solid block with which the pulse collides - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - c(blockTop:blockBottom, blockLeft:blockRight) = 0.5 - - ! Channel directing the pulse leftwards - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - c(channel1Height,channelLeft:channelRight) = 0.0; - c(channel2Height,channelLeft:channelRight) = 0.0; - - ! Initial pressure distribution: a gaussian pulse inside the channel - cr = N / 2.0 - cc = 7.0 * N / 8.0 - s2 = 64.0 * 9.0 / ((N / 2.0) ** 2) - - DO j=1,N - DO i=1,N - P2(i,j) = exp(-((i-cr)**2 + (j-cc)**2) * s2) - END DO - END DO - - P1 = 0.0 - P3 = 0.0 -END - - - - - -SUBROUTINE checkArray_f90_tuned(A, N) -INTEGER N -REAL, DIMENSION(N,N) :: A - -INTEGER i,j -REAL check -check = 0.0 -DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j) - END DO -END DO - -PRINT *, 'Array check: ', check -RETURN -END - diff --git a/benchmarks/arrdaxpy.cpp b/benchmarks/arrdaxpy.cpp deleted file mode 100644 index 0714a2a5..00000000 --- a/benchmarks/arrdaxpy.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// Array DAXPY benchmark - -#include -#include -#include -#include - -namespace blitz { -extern void sink(); -} - -using namespace blitz; - -#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES - #define arrdaxpyf arrdaxpyf_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define arrdaxpyf arrdaxpyf__ -#endif - -extern "C" { - void arrdaxpyf(double* A, double* B, int& N, double& a); -} - -void arrdaxpyFortran77Version(BenchmarkExt& bench); -void arrdaxpyBlitzVersion(BenchmarkExt& bench); - -int main() -{ - BenchmarkExt bench("Array DAXPY", 2); - - const int numSizes = 8; - - bench.setNumParameters(numSizes); - bench.setDependentVariable("flops"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - parameters = pow(2.,tensor::i); - cout << parameters; - iters = 100*16*32*8*8*8/pow3(parameters); - cout << iters; - flops = pow3(parameters) * 2 * 2; - cout << flops; - - bench.setParameterVector(parameters); - bench.setParameterDescription("3D Array size"); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - arrdaxpyBlitzVersion(bench); - arrdaxpyFortran77Version(bench); - bench.endBenchmarking(); - - bench.saveMatlabGraph("arrdaxpy.m"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[i] = rnd.random(); -} - -void arrdaxpyBlitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N); - initializeRandomDouble(B.data(), N*N*N); - TinyVector size = N-2; - double a = 0.34928313; - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - A += a * B; - A += b * B; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -void arrdaxpyFortran77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - double a = 0.34928313; - - for (long i=0; i < iters; ++i) - { - arrdaxpyf(A,B,N,a); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} diff --git a/benchmarks/arrdaxpyf.f b/benchmarks/arrdaxpyf.f deleted file mode 100644 index 3df64ba0..00000000 --- a/benchmarks/arrdaxpyf.f +++ /dev/null @@ -1,26 +0,0 @@ - subroutine arrdaxpyf(A, B, N, c1) - integer N, iters - double precision A(N,N,N), B(N,N,N) - double precision c1 - double precision c2 - c2 = - c1 - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - A(i,j,k) = A(i,j,k) + c1 * B(i,j,k); - enddo - enddo - enddo - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - A(i,j,k) = A(i,j,k) + c2 * B(i,j,k); - enddo - enddo - enddo - - return - end - diff --git a/benchmarks/arrexpr1.cpp b/benchmarks/arrexpr1.cpp deleted file mode 100644 index 8010ac16..00000000 --- a/benchmarks/arrexpr1.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Array expression benchmark - -#include -#include - -using namespace blitz; - -void blitzVersion(BenchmarkExt& bench); -void CVersion(BenchmarkExt& bench); - -int main() -{ - BenchmarkExt bench("Array expression", 2); - - bench.beginBenchmarking(); - blitzVersion(bench); - CVersion(bench); - bench.endBenchmarking(); - - bench.saveMatlabGraph("arrexpr1.m"); - - return 0; -} - -void blitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++: N = " << N << endl; - - long iters = bench.getIterations(); - - Array x(N); - - // Tickle - x = 0.; - firstIndex i; - - bench.start(); - for (long it=0; it < iters; ++it) - { - x = i * i; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void CVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("C"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "C: N = " << N << endl; - - long iters = bench.getIterations(); - - double* x = new double[N]; - - // Tickle - for (int i=0; i < N; ++i) - x[i] = 0; - - bench.start(); - for (long it=0; it < iters; ++it) - { - for (int i=0; i < N; ++i) - x[i] = i * i; - } - bench.stop(); - - delete [] x; - } - - bench.endImplementation(); -} - diff --git a/benchmarks/arrexpr1.m b/benchmarks/arrexpr1.m deleted file mode 100644 index a63dee2a..00000000 --- a/benchmarks/arrexpr1.m +++ /dev/null @@ -1,28 +0,0 @@ -% This matlab file generated automatically by class Benchmark -% of the Blitz++ class library. - -parm = [ 1.000000000000e+00 3.000000000000e+00 5.000000000000e+00 1.000000000000e+01 1.700000000000e+01 3.100000000000e+01 5.600000000000e+01 1.000000000000e+02 1.770000000000e+02 3.160000000000e+02 5.620000000000e+02 1.000000000000e+03 1.778000000000e+03 3.162000000000e+03 5.623000000000e+03 1.000000000000e+04 1.778200000000e+04 3.162200000000e+04 5.623400000000e+04 ]; - -Mf = [ 2.500000000000e+01 5.555555555556e+00 ; -8.333300000000e+00 6.249975000000e+00 ; -8.333333333333e+00 6.250000000000e+00 ; -6.250000000000e+00 6.250000000000e+00 ; -6.249837500000e+00 6.249837500000e+00 ; -6.249987500000e+00 5.555544444444e+00 ; -6.249600000000e+00 6.249600000000e+00 ; -6.250000000000e+00 6.250000000000e+00 ; -6.248100000000e+00 5.553866666667e+00 ; -6.248900000000e+00 6.248900000000e+00 ; -5.551311111111e+00 6.245225000000e+00 ; -6.250000000000e+00 6.250000000000e+00 ; -6.245225000000e+00 5.551311111111e+00 ; -5.551066666667e+00 5.551066666667e+00 ; -5.498044444444e+00 5.498044444444e+00 ; -5.555555555556e+00 5.000000000000e+00 ; -4.978960000000e+00 4.978960000000e+00 ; -4.743300000000e+00 5.929125000000e+00 ; -4.998577777778e+00 4.498720000000e+00 ] ; - -semilogx(parm,Mf), title('Array expression'), - xlabel('Vector length'), ylabel('Mflops/s') -legend('Blitz++', 'C') diff --git a/benchmarks/cfd.cpp b/benchmarks/cfd.cpp deleted file mode 100644 index 43c5176c..00000000 --- a/benchmarks/cfd.cpp +++ /dev/null @@ -1,129 +0,0 @@ -#include -#include - -using namespace blitz; - -/* - * The current implementation of stencil objects forces these variables - * to be placed in global scope. Ugh. This restriction will be removed - * eventually. - */ -double rho; // Density of fluid -double recip_rho; // 1/rho -double eta; // Kinematic viscosity -double time_now; // Elapsed seconds -double delta_t; // Time step -double volume; // Volume of a cell -double airPressure; // Air pressure (Pa) -double spatialStep; // Grid element size -double gravity; // Acceleration due to gravity -double gravityPressureGradient; // Pressure gradient due to gravity -/* - * The "geometry" object specifies how an array is mapped into real-world - * space. In this case, "UniformCubicGeometry" is used, which means that - * the real-world grid is orthogonal, regularly spaced, with the same spatial - * step in each dimension. - */ - -UniformCubicGeometry<3> geom; // Geometry -/* - * Some typedefs to make life easier. - */ - -typedef TinyVector vector3d; -typedef Array vectorField; -typedef Array scalarField; - -/*********** Timestep the velocity field ************ - * This is a 63-point stencil. For example, Laplacian3DVec4 turns into - * a 45-point stencil: each 2nd derivative is a 5-point stencil, and - * there are 9 of these derivatives to take the Laplacian of a 3D vector - * field. - */ - -BZ_DECLARE_STENCIL5(timestep, V, nextV, P, advect, force) - - nextV = *V + delta_t * ( recip_rho * ( - eta * Laplacian3DVec4(V,geom) - grad3D4(P, geom) + *force) - *advect); - -BZ_END_STENCIL -/* - * Allocate arrays and set their initial state - */ -void setup(const int N, vectorField& V, vectorField& nextV, scalarField& P, - scalarField& P_rhs, vectorField& advect, vectorField& force) -{ - // A 1m x 1m x 1m domain - spatialStep = 1.0 / (N - 1); - geom = UniformCubicGeometry<3>(spatialStep); - - // Allocate arrays - allocateArrays(shape(N,N,N), advect, V, nextV, force); // vector fields - allocateArrays(shape(N,N,N), P, P_rhs); // scalar fields - - // Since incompressibility is assumed, pressure only shows up as - // derivative terms in the equations. We choose airPressure = 0 - // as an arbitrary datum. - - airPressure = 0; // Pa - rho = 1000; // density of fluid, kg/m^3 - recip_rho = 1.0 / rho; // inverse of density - eta = 1.0e-6; // kinematic viscosity of fluid, m^2/s - gravity = 9.81; // m/s^2 - delta_t = 0.001; // initial time step, in seconds - volume = pow3(spatialStep); // cubic volume associated with grid point - - // Kludge: Set eta high, so that the flow will spread faster. - // This means the cube is filled with molasses, rather than water. - eta *= 1000; - - // Initial conditions: quiescent - V = 0.0; - P_rhs = 0.0; - advect = 0.0; - nextV = 0.0; - P = 0.0; - force = 0.0; -} - -// Calculate a simple check on a vector field -void record(vectorField& V) -{ - // Calculate the magnitude of a field - const int x=0, y=1, z=2; - double magx = sum(pow2(V[x])) / V.numElements(); - double magy = sum(pow2(V[y])) / V.numElements(); - double magz = sum(pow2(V[z])) / V.numElements(); - - cout << "norm = [" << magx - << " " << magy << " " << magz << " ]" << endl; -} - -void iterate(vectorField& V, vectorField& nextV, scalarField& P, - scalarField& P_rhs, vectorField& advect, vectorField& force) -{ - // Time step - applyStencil(timestep(), V, nextV, P, advect, force); -} - -int main() -{ - vectorField V, nextV; // Velocity fields - scalarField P, P_rhs; // Pressure fields - vectorField advect; // Advection field - vectorField force; // Forcing function - - const int N = 50; // Arrays are NxNxN - - setup(N, V, nextV, P, P_rhs, advect, force); - - const int nIters = 10; - - for (int i=0; i < nIters; ++i) - { - iterate(V, nextV, P, P_rhs, advect, force); - } - - return 0; -} - diff --git a/benchmarks/cfdf.f b/benchmarks/cfdf.f deleted file mode 100644 index d713aab6..00000000 --- a/benchmarks/cfdf.f +++ /dev/null @@ -1,103 +0,0 @@ - - PROGRAM CFDF - - PARAMETER ( N = 50, niters = 10 ) - - REAL VX(N,N,N), VY(N,N,N), VZ(N,N,N) - REAL VX2(N,N,N), VY2(N,N,N), VZ2(N,N,N) - REAL P(N,N,N) - REAL FX(N,N,N), FY(N,N,N), FZ(N,N,N) - REAL AX(N,N,N), AY(N,N,N), AZ(N,N,N) - - INTEGER iter - -C Initialize arrays - - CALL initialize(N, VX, VY, VZ, VX2, VY2, VZ2, P, FX, FY, FZ, - . AX, AY, AZ) - -C Apply the stencil a few times - - DO iter=1,niters - CALL cfdStencil(N, VX, VY, VZ, VX2, VY2, VZ2, P, - . FX, FY, FZ, AX, AY, AZ) - END DO - - STOP - END - - SUBROUTINE cfdStencil(N, VX, VY, VZ, VX2, VY2, VZ2, P, - . FX, FY, FZ, AX, AY, AZ) - - INTEGER N - REAL VX(N,N,N), VY(N,N,N), VZ(N,N,N) - REAL VX2(N,N,N), VY2(N,N,N), VZ2(N,N,N) - REAL P(N,N,N) - REAL FX(N,N,N), FY(N,N,N), FZ(N,N,N) - REAL AX(N,N,N), AY(N,N,N), AZ(N,N,N) - - PARAMETER ( delta_t = 0.001, recip_rho = 1.0e-3, - . eta = 1.0e-6, c1 = 0.1, c2 = 0.1 ) - - DO i=3,N-2 - DO j=3,N-2 - DO k=3,N-2 - VX2(i,j,k) = VX(i,j,k)+delta_t*(recip_rho*(eta* - . c1 * (-90*VX(i,j,k)-VX(i-2,j,k)+16*VX(i-1,j,k) - . +16*VX(i+1,j,k)-VX(i+2,j,k)-VX(i,j-2,k)+16*VX(i,j-1,k) - . +16*VX(i,j+1,k)-VX(i,j+2,k)-VX(i,j,k-2)+16*VX(i,j,k-1) - . +16*VX(i,j,k+1)-VX(i,j,k+2))+c2*(P(i-2,j,k) - . -8*P(i-1,j,k)+8*P(i+1,j,k)+P(i+2,j,k))+FX(i,j,k)) - . -AX(i,j,k)) - VY2(i,j,k) = VY(i,j,k)+delta_t*(recip_rho*(eta* - . c1 * (-90*VY(i,j,k)-VY(i-2,j,k)+16*VY(i-1,j,k) - . +16*VY(i+1,j,k)-VY(i+2,j,k)-VY(i,j-2,k)+16*VY(i,j-1,k) - . +16*VY(i,j+1,k)-VY(i,j+2,k)-VY(i,j,k-2)+16*VY(i,j,k-1) - . +16*VY(i,j,k+1)-VY(i,j,k+2))+c2*(P(i,j-2,k) - . -8*P(i,j-1,k)+8*P(i,j+1,k)+P(i,j+2,k))+FY(i,j,k)) - . -AY(i,j,k)) - VZ2(i,j,k) = VZ(i,j,k)+delta_t*(recip_rho*(eta* - . c1 * (-90*VZ(i,j,k)-VZ(i-2,j,k)+16*VZ(i-1,j,k) - . +16*VZ(i+1,j,k)-VZ(i+2,j,k)-VZ(i,j-2,k)+16*VZ(i,j-1,k) - . +16*VZ(i,j+1,k)-VZ(i,j+2,k)-VZ(i,j,k-2)+16*VZ(i,j,k-1) - . +16*VZ(i,j,k+1)-VZ(i,j,k+2))+c2*(P(i,j,k-2) - . -8*P(i,j,k-1)+8*P(i,j,k+1)+P(i,j,k+2))+FZ(i,j,k)) - . -AZ(i,j,k)) - END DO - END DO - END DO - - RETURN - END - - - - SUBROUTINE initialize(N, VX, VY, VZ, VX2, VY2, VZ2, P, - . FX, FY, FZ, AX, AY, AZ) - - INTEGER N - REAL VX(N,N,N), VY(N,N,N), VZ(N,N,N) - REAL VX2(N,N,N), VY2(N,N,N), VZ2(N,N,N) - REAL P(N,N,N) - REAL FX(N,N,N), FY(N,N,N), FZ(N,N,N) - REAL AX(N,N,N), AY(N,N,N), AZ(N,N,N) - - DO i=3,N-2 - DO j=3,N-2 - DO k=3,N-2 - VX(i,j,k) = 0 - VY(i,j,k) = 0 - VZ(i,j,k) = 0 - P(i,j,k) = 0 - FX(i,j,k) = 0 - FY(i,j,k) = 0 - FZ(i,j,k) = 0 - AX(i,j,k) = 0 - AY(i,j,k) = 0 - AZ(i,j,k) = 0 - END DO - END DO - END DO - - END - diff --git a/benchmarks/cfortran.h b/benchmarks/cfortran.h deleted file mode 100644 index ea0532eb..00000000 --- a/benchmarks/cfortran.h +++ /dev/null @@ -1,2090 +0,0 @@ -/* cfortran.h 3.5 */ /* anonymous ftp@zebra.desy.de */ -/* Burkhard Burow burow@desy.de 1990 - 1996. */ - -#ifndef __CFORTRAN_LOADED -#define __CFORTRAN_LOADED - -/* - THIS FILE IS PROPERTY OF BURKHARD BUROW. IF YOU ARE USING THIS FILE YOU - SHOULD ALSO HAVE ACCESS TO CFORTRAN.DOC WHICH PROVIDES TERMS FOR USING, - MODIFYING, COPYING AND DISTRIBUTING THE CFORTRAN.H PACKAGE. -*/ - -/* - Avoid symbols already used by compilers and system *.h: - __ - OSF1 zukal06 V3.0 347 alpha, cc -c -std1 cfortest.c - - */ - - -/* First prepare for the C compiler. */ - -#ifndef ANSI_C_preprocessor /* i.e. user can override. */ -#ifdef __CF__KnR -#define ANSI_C_preprocessor 0 -#else -#ifdef __STDC__ -#define ANSI_C_preprocessor 1 -#else -#define _cfleft 1 -#define _cfright -#define _cfleft_cfright 0 -#define ANSI_C_preprocessor _cfleft/**/_cfright -#endif -#endif -#endif - -#if ANSI_C_preprocessor -#define _0(A,B) A##B -#define _(A,B) _0(A,B) /* see cat,xcat of K&R ANSI C p. 231 */ -#define _2(A,B) A##B /* K&R ANSI C p.230: .. identifier is not replaced */ -#define _3(A,B,C) _(A,_(B,C)) -#else /* if it turns up again during rescanning. */ -#define _(A,B) A/**/B -#define _2(A,B) A/**/B -#define _3(A,B,C) A/**/B/**/C -#endif - -#if (defined(vax)&&defined(unix)) || (defined(__vax__)&&defined(__unix__)) -#define VAXUltrix -#endif - -#include /* NULL [in all machines stdio.h] */ -#include /* strlen, memset, memcpy, memchr. */ -#if !( defined(VAXUltrix) || defined(sun) || (defined(apollo)&&!defined(__STDCPP__)) ) -#include /* malloc,free */ -#else -#include /* Had to be removed for DomainOS h105 10.4 sys5.3 425t*/ -#ifdef apollo -#define __CF__APOLLO67 /* __STDCPP__ is in Apollo 6.8 (i.e. ANSI) and onwards */ -#endif -#endif - -#if !defined(__GNUC__) && !defined(__sun) && (defined(sun)||defined(VAXUltrix)||defined(lynx)) -#define __CF__KnR /* Sun, LynxOS and VAX Ultrix cc only supports K&R. */ - /* Manually define __CF__KnR for HP if desired/required.*/ -#endif /* i.e. We will generate Kernighan and Ritchie C. */ -/* Note that you may define __CF__KnR before #include cfortran.h, in order to -generate K&R C instead of the default ANSI C. The differences are mainly in the -function prototypes and declarations. All machines, except the Apollo, work -with either style. The Apollo's argument promotion rules require ANSI or use of -the obsolete std_$call which we have not implemented here. Hence on the Apollo, -only C calling FORTRAN subroutines will work using K&R style.*/ - - -/* Remainder of cfortran.h depends on the Fortran compiler. */ - -#ifdef CLIPPERFortran -#define f2cFortran -#endif - -/* VAX/VMS does not let us \-split long #if lines. */ -/* Split #if into 2 because some HP-UX can't handle long #if */ -#if !(defined(NAGf90Fortran)||defined(f2cFortran)||defined(hpuxFortran)||defined(apolloFortran)||defined(sunFortran)||defined(IBMR2Fortran)||defined(CRAYFortran)) -#if !(defined(mipsFortran)||defined(DECFortran)||defined(vmsFortran)||defined(CONVEXFortran)||defined(PowerStationFortran)||defined(AbsoftUNIXFortran)) -/* If no Fortran compiler is given, we choose one for the machines we know. */ -#if defined(lynx) || defined(VAXUltrix) -#define f2cFortran /* Lynx: Only support f2c at the moment. - VAXUltrix: f77 behaves like f2c. - Support f2c or f77 with gcc, vcc with f2c. - f77 with vcc works, missing link magic for f77 I/O.*/ -#endif -#if defined(__hpux) /* 921107: Use __hpux instead of __hp9000s300 */ -#define hpuxFortran /* Should also allow hp9000s7/800 use.*/ -#endif -#if defined(apollo) -#define apolloFortran /* __CF__APOLLO67 defines some behavior. */ -#endif -#if defined(sun) || defined(__sun) -#define sunFortran -#endif -#if defined(_IBMR2) -#define IBMR2Fortran -#endif -#if defined(_CRAY) -#define CRAYFortran /* _CRAY2 defines some behavior. */ -#endif -#if defined(mips) || defined(__mips) -#define mipsFortran -#endif -#if defined(vms) || defined(__vms) -#define vmsFortran -#endif -#if defined(__alpha) && defined(__unix__) -#define DECFortran -#endif -#if defined(__convex__) -#define CONVEXFortran -#endif -#if defined(VISUAL_CPLUSPLUS) -#define PowerStationFortran -#endif -#endif /* ...Fortran */ -#endif /* ...Fortran */ - -/* Split #if into 2 because some HP-UX can't handle long #if */ -#if !(defined(NAGf90Fortran)||defined(f2cFortran)||defined(hpuxFortran)||defined(apolloFortran)||defined(sunFortran)||defined(IBMR2Fortran)||defined(CRAYFortran)) -#if !(defined(mipsFortran)||defined(DECFortran)||defined(vmsFortran)||defined(CONVEXFortran)||defined(PowerStationFortran)||defined(AbsoftUNIXFortran)) -/* Apologies for the trigraph, but some compilers barf on #error. */ -??=error "cfortran.h: Can't find your environment among:\ - - MIPS cc and f77 2.0. (e.g. Silicon Graphics, DECstations, ...) \ - - IBM AIX XL C and FORTRAN Compiler/6000 Version 01.01.0000.0000 \ - - VAX VMS CC 3.1 and FORTRAN 5.4. \ - - Alpha VMS DEC C 1.3 and DEC FORTRAN 6.0. \ - - Alpha OSF DEC C and DEC Fortran for OSF/1 AXP Version 1.2 \ - - Apollo DomainOS 10.2 (sys5.3) with f77 10.7 and cc 6.7. \ - - CRAY \ - - CONVEX \ - - Sun \ - - PowerStation Fortran with Visual C++ \ - - HP9000s300/s700/s800 Latest test with: HP-UX A.08.07 A 9000/730 \ - - LynxOS: cc or gcc with f2c. \ - - VAXUltrix: vcc,cc or gcc with f2c. gcc or cc with f77. \ - - f77 with vcc works; but missing link magic for f77 I/O. \ - - NO fort. None of gcc, cc or vcc generate required names.\ - - f2c : Use #define f2cFortran, or cc -Df2cFortran \ - - NAG f90: Use #define NAGf90Fortran, or cc -DNAGf90Fortran \ - - Absoft UNIX F77: Use #define AbsoftUNIXFortran or cc -DAbsoftUNIXFortran" -/* Compiler must throw us out at this point! */ -#endif -#endif - - -#if defined(VAXC) && !defined(__VAXC) -#define OLD_VAXC -#pragma nostandard /* Prevent %CC-I-PARAMNOTUSED. */ -#endif - -/* Throughout cfortran.h we use: UN = Uppercase Name. LN = Lowercase Name. */ - -#if defined(f2cFortran) || defined(NAGf90Fortran) || defined(DECFortran) || defined(mipsFortran) || defined(apolloFortran) || defined(sunFortran) || defined(CONVEXFortran) || defined(extname) -#define CFC_(UN,LN) _(LN,_) /* Lowercase FORTRAN symbols. */ -#define orig_fcallsc(UN,LN) CFC_(UN,LN) -#else -#if defined(CRAYFortran) || defined(PowerStationFortran) -#define CFC_(UN,LN) UN /* Uppercase FORTRAN symbols. */ -#define orig_fcallsc(UN,LN) CFC_(UN,LN) /* CRAY insists on arg.'s here. */ -#else /* For following machines one may wish to change the fcallsc default. */ -#define CF_SAME_NAMESPACE -#ifdef vmsFortran -#define CFC_(UN,LN) LN /* Either case FORTRAN symbols. */ - /* BUT we usually use UN for C macro to FORTRAN routines, so use LN here,*/ - /* because VAX/VMS doesn't do recursive macros. */ -#define orig_fcallsc(UN,LN) UN -#else /* HP-UX without +ppu or IBMR2 without -qextname. NOT reccomended. */ -#define CFC_(UN,LN) LN /* Lowercase FORTRAN symbols. */ -#define orig_fcallsc(UN,LN) CFC_(UN,LN) -#endif /* vmsFortran */ -#endif /* CRAYFortran */ -#endif /* ....Fortran */ - -#define fcallsc(UN,LN) orig_fcallsc(UN,LN) -#define preface_fcallsc(P,p,UN,LN) CFC_(_(P,UN),_(p,LN)) -#define append_fcallsc(P,p,UN,LN) CFC_(_(UN,P),_(LN,p)) - -#define C_FUNCTION(UN,LN) fcallsc(UN,LN) -#define FORTRAN_FUNCTION(UN,LN) CFC_(UN,LN) - -#ifndef COMMON_BLOCK -#ifndef CONVEXFortran -#ifndef CLIPPERFortran -#define COMMON_BLOCK(UN,LN) CFC_(UN,LN) -#else -#define COMMON_BLOCK(UN,LN) _(LN,__) -#endif -#else -#define COMMON_BLOCK(UN,LN) _3(_,LN,_) -#endif -#endif - -#ifdef CRAYFortran -#ifdef _CRAY -#include -#else -#include "fortran.h" /* i.e. if crosscompiling assume user has file. */ -#endif -#ifndef DOUBLE_PRECISION -#define DOUBLE_PRECISION long double -#endif -#define FLOATVVVVVVV_cfPP (float *) /* Used for C calls FORTRAN. */ -/* CRAY's double==float but CRAY says pointers to doubles and floats are diff.*/ -#define VOIDP (void *) /* When FORTRAN calls C, we don't know if C routine - arg.'s have been declared float *, or double *. */ -#else -#ifndef DOUBLE_PRECISION -#define DOUBLE_PRECISION double -#endif -#define FLOATVVVVVVV_cfPP -#define VOIDP -#endif - -#ifdef vmsFortran -#if defined(vms) || defined(__vms) -#include -#else -#include "descrip.h" /* i.e. if crosscompiling assume user has file. */ -#endif -#endif - -#ifdef sunFortran -#if defined(sun) || defined(__sun) -#include /* Sun's FLOATFUNCTIONTYPE, ASSIGNFLOAT, RETURNFLOAT. */ -#else -#include "math.h" /* i.e. if crosscompiling assume user has file. */ -#endif -/* At least starting with the default C compiler SC3.0.1 of SunOS 5.3, - * FLOATFUNCTIONTYPE, ASSIGNFLOAT, RETURNFLOAT are not required and not in - * , since sun C no longer promotes C float return values to doubles. - * Therefore, only use them if defined. - * Even if gcc is being used, assume that it exhibits the Sun C compiler - * behavior in order to be able to use *.o from the Sun C compiler. - * i.e. If FLOATFUNCTIONTYPE, etc. are in math.h, they required by gcc. - */ -#endif - -#ifndef apolloFortran -#define COMMON_BLOCK_DEF(DEFINITION, NAME) extern DEFINITION NAME -#define CF_NULL_PROTO -#else /* HP doesn't understand #elif. */ -/* Without ANSI prototyping, Apollo promotes float functions to double. */ -/* Note that VAX/VMS, IBM, Mips choke on 'type function(...);' prototypes. */ -#define CF_NULL_PROTO ... -#ifndef __CF__APOLLO67 -#define COMMON_BLOCK_DEF(DEFINITION, NAME) \ - DEFINITION NAME __attribute((__section(NAME))) -#else -#define COMMON_BLOCK_DEF(DEFINITION, NAME) \ - DEFINITION NAME #attribute[section(NAME)] -#endif -#endif - -#ifdef __cplusplus -#undef CF_NULL_PROTO -#define CF_NULL_PROTO ... -#endif - -#ifdef mipsFortran -#define CF_DECLARE_GETARG int f77argc; char **f77argv -#define CF_SET_GETARG(ARGC,ARGV) f77argc = ARGC; f77argv = ARGV -#else -#define CF_DECLARE_GETARG -#define CF_SET_GETARG(ARGC,ARGV) -#endif - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - -#define ACOMMA , -#define ACOLON ; - -/*-------------------------------------------------------------------------*/ - -/* UTILITIES USED WITHIN CFORTRAN.H */ - -#define _cfMIN(A,B) (As) { /* Need this to handle NULL string.*/ - while (e>s && *--e==t); /* Don't follow t's past beginning. */ - e[*e==t?0:1] = '\0'; /* Handle s[0]=t correctly. */ -} return s; } - -/* kill_trailingn(s,t,e) will kill the trailing t's in string s. e normally -points to the terminating '\0' of s, but may actually point to anywhere in s. -s's new '\0' will be placed at e or earlier in order to remove any trailing t's. -If es) { /* Watch out for neg. length string.*/ - while (e>s && *--e==t); /* Don't follow t's past beginning. */ - e[*e==t?0:1] = '\0'; /* Handle s[0]=t correctly. */ -} return s; } - -/* Note the following assumes that any element which has t's to be chopped off, -does indeed fill the entire element. */ -#ifndef __CF__KnR -static char *vkill_trailing(char* cstr, int elem_len, int sizeofcstr, char t) -#else -static char *vkill_trailing( cstr, elem_len, sizeofcstr, t) - char* cstr; int elem_len; int sizeofcstr; char t; -#endif -{ int i; -for (i=0; i= 4.3 gives message: - zow35> cc -c -DDECFortran cfortest.c - cfe: Fatal: Out of memory: cfortest.c - zow35> - Old __hpux had the problem, but new 'HP-UX A.09.03 A 9000/735' is fine - if using -Aa, otherwise we have a problem. - */ -#ifndef MAX_PREPRO_ARGS -#if !defined(__GNUC__) && (defined(VAXUltrix) || defined(__CF__APOLLO67) || (defined(sun)&&!defined(__sun)) || defined(_CRAY) || defined(__ultrix__) || (defined(__hpux)&&defined(__CF__KnR))) -#define MAX_PREPRO_ARGS 31 -#else -#define MAX_PREPRO_ARGS 99 -#endif -#endif - -#if defined(AbsoftUNIXFortran) -/* In addition to explicit Absoft stuff, only Absoft requires: - - DEFAULT coming from _cfSTR. - DEFAULT could have been called e.g. INT, but keep it for clarity. - - M term in CFARGT14 and CFARGT14FS. - */ -#define ABSOFT_cf1(T0) _(T0,_cfSTR)(0,ABSOFT1,0,0,0,0,0) -#define ABSOFT_cf2(T0) _(T0,_cfSTR)(0,ABSOFT2,0,0,0,0,0) -#define ABSOFT_cf3(T0) _(T0,_cfSTR)(0,ABSOFT3,0,0,0,0,0) -#define DEFAULT_cfABSOFT1 -#define LOGICAL_cfABSOFT1 -#define STRING_cfABSOFT1 ,MAX_LEN_FORTRAN_FUNCTION_STRING -#define DEFAULT_cfABSOFT2 -#define LOGICAL_cfABSOFT2 -#define STRING_cfABSOFT2 ,unsigned D0 -#define DEFAULT_cfABSOFT3 -#define LOGICAL_cfABSOFT3 -#define STRING_cfABSOFT3 ,D0 -#else -#define ABSOFT_cf1(T0) -#define ABSOFT_cf2(T0) -#define ABSOFT_cf3(T0) -#endif - -#define CFARGT14S(S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - S(T1,1) S(T2,2) S(T3,3) S(T4,4) S(T5,5) S(T6,6) S(T7,7) \ - S(T8,8) S(T9,9) S(TA,A) S(TB,B) S(TC,C) S(TD,D) S(TE,E) -#define CFARGT14FS(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - F(T1,1,0) F(T2,2,1) F(T3,3,1) F(T4,4,1) F(T5,5,1) F(T6,6,1) F(T7,7,1) \ - F(T8,8,1) F(T9,9,1) F(TA,A,1) F(TB,B,1) F(TC,C,1) F(TD,D,1) F(TE,E,1) \ - M CFARGT14S(S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) - -#if !(defined(PowerStationFortran)||defined(hpuxFortran800)) -/* Old CFARGT14 -> CFARGT14FS as seen below, for Absoft cross-compile yields: - SunOS> cc -c -Xa -DAbsoftUNIXFortran c.c - "c.c", line 406: warning: argument mismatch - Haven't checked if this is ANSI C or a SunOS bug. SunOS -Xs works ok. - Behavior is most clearly seen in example: - #define A 1 , 2 - #define C(X,Y,Z) x=X. y=Y. z=Z. - #define D(X,Y,Z) C(X,Y,Z) - D(x,A,z) - Output from preprocessor is: x = x . y = 1 . z = 2 . - #define CFARGT14(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFARGT14FS(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -*/ -#define CFARGT14(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - F(T1,1,0) F(T2,2,1) F(T3,3,1) F(T4,4,1) F(T5,5,1) F(T6,6,1) F(T7,7,1) \ - F(T8,8,1) F(T9,9,1) F(TA,A,1) F(TB,B,1) F(TC,C,1) F(TD,D,1) F(TE,E,1) \ - M CFARGT14S(S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define CFARGT20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - F(T1,1,0) F(T2,2,1) F(T3,3,1) F(T4,4,1) F(T5,5,1) F(T6,6,1) F(T7,7,1) \ - F(T8,8,1) F(T9,9,1) F(TA,A,1) F(TB,B,1) F(TC,C,1) F(TD,D,1) F(TE,E,1) \ - F(TF,F,1) F(TG,G,1) F(TH,H,1) F(TI,I,1) F(TJ,J,1) F(TK,K,1) \ - S(T1,1) S(T2,2) S(T3,3) S(T4,4) S(T5,5) S(T6,6) S(T7,7) \ - S(T8,8) S(T9,9) S(TA,A) S(TB,B) S(TC,C) S(TD,D) S(TE,E) \ - S(TF,F) S(TG,G) S(TH,H) S(TI,I) S(TJ,J) S(TK,K) -#define CFARGTA14(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) \ - F(T1,A1,1,0) F(T2,A2,2,1) F(T3,A3,3,1) F(T4,A4,4,1) F(T5,A5,5,1) F(T6,A6,6,1) \ - F(T7,A7,7,1) F(T8,A8,8,1) F(T9,A9,9,1) F(TA,AA,A,1) F(TB,AB,B,1) F(TC,AC,C,1) \ - F(TD,AD,D,1) F(TE,AE,E,1) S(T1,1) S(T2,2) S(T3,3) S(T4,4) \ - S(T5,5) S(T6,6) S(T7,7) S(T8,8) S(T9,9) S(TA,A) \ - S(TB,B) S(TC,C) S(TD,D) S(TE,E) -#if MAX_PREPRO_ARGS>31 -#define CFARGTA20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ - F(T1,A1,1,0) F(T2,A2,2,1) F(T3,A3,3,1) F(T4,A4,4,1) F(T5,A5,5,1) F(T6,A6,6,1) \ - F(T7,A7,7,1) F(T8,A8,8,1) F(T9,A9,9,1) F(TA,AA,A,1) F(TB,AB,B,1) F(TC,AC,C,1) \ - F(TD,AD,D,1) F(TE,AE,E,1) F(TF,AF,F,1) F(TG,AG,G,1) F(TH,AH,H,1) F(TI,AI,I,1) \ - F(TJ,AJ,J,1) F(TK,AK,K,1) S(T1,1) S(T2,2) S(T3,3) S(T4,4) \ - S(T5,5) S(T6,6) S(T7,7) S(T8,8) S(T9,9) S(TA,A) \ - S(TB,B) S(TC,C) S(TD,D) S(TE,E) S(TF,F) S(TG,G) \ - S(TH,H) S(TI,I) S(TJ,J) S(TK,K) -#endif -#else -#define CFARGT14(F,S,M,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - F(T1,1,0) S(T1,1) F(T2,2,1) S(T2,2) F(T3,3,1) S(T3,3) F(T4,4,1) S(T4,4) \ - F(T5,5,1) S(T5,5) F(T6,6,1) S(T6,6) F(T7,7,1) S(T7,7) F(T8,8,1) S(T8,8) \ - F(T9,9,1) S(T9,9) F(TA,A,1) S(TA,A) F(TB,B,1) S(TB,B) F(TC,C,1) S(TC,C) \ - F(TD,D,1) S(TD,D) F(TE,E,1) S(TE,E) -#define CFARGT20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - F(T1,1,0) S(T1,1) F(T2,2,1) S(T2,2) F(T3,3,1) S(T3,3) F(T4,4,1) S(T4,4) \ - F(T5,5,1) S(T5,5) F(T6,6,1) S(T6,6) F(T7,7,1) S(T7,7) F(T8,8,1) S(T8,8) \ - F(T9,9,1) S(T9,9) F(TA,A,1) S(TA,A) F(TB,B,1) S(TB,B) F(TC,C,1) S(TC,C) \ - F(TD,D,1) S(TD,D) F(TE,E,1) S(TE,E) F(TF,F,1) S(TF,F) F(TG,G,1) S(TG,G) \ - F(TH,H,1) S(TH,H) F(TI,I,1) S(TI,I) F(TJ,J,1) S(TJ,J) F(TK,K,1) S(TK,K) -#define CFARGTA14(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) \ - F(T1,A1,1,0) S(T1,1) F(T2,A2,2,1) S(T2,2) F(T3,A3,3,1) S(T3,3) \ - F(T4,A4,4,1) S(T4,4) F(T5,A5,5,1) S(T5,5) F(T6,A6,6,1) S(T6,6) \ - F(T7,A7,7,1) S(T7,7) F(T8,A8,8,1) S(T8,8) F(T9,A9,9,1) S(T9,9) \ - F(TA,AA,A,1) S(TA,A) F(TB,AB,B,1) S(TB,B) F(TC,AC,C,1) S(TC,C) \ - F(TD,AD,D,1) S(TD,D) F(TE,AE,E,1) S(TE,E) -#if MAX_PREPRO_ARGS>31 -#define CFARGTA20(F,S,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ - F(T1,A1,1,0) S(T1,1) F(T2,A2,2,1) S(T2,2) F(T3,A3,3,1) S(T3,3) \ - F(T4,A4,4,1) S(T4,4) F(T5,A5,5,1) S(T5,5) F(T6,A6,6,1) S(T6,6) \ - F(T7,A7,7,1) S(T7,7) F(T8,A8,8,1) S(T8,8) F(T9,A9,9,1) S(T9,9) \ - F(TA,AA,A,1) S(TA,A) F(TB,AB,B,1) S(TB,B) F(TC,AC,C,1) S(TC,C) \ - F(TD,AD,D,1) S(TD,D) F(TE,AE,E,1) S(TE,E) F(TF,AF,F,1) S(TF,F) \ - F(TG,AG,G,1) S(TG,G) F(TH,AH,H,1) S(TH,H) F(TI,AI,I,1) S(TI,I) \ - F(TJ,AJ,J,1) S(TJ,J) F(TK,AK,K,1) S(TK,K) -#endif -#endif - - -#define PROTOCCALLSFSUB1( UN,LN,T1) \ - PROTOCCALLSFSUB14(UN,LN,T1,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB2( UN,LN,T1,T2) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB3( UN,LN,T1,T2,T3) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB4( UN,LN,T1,T2,T3,T4) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB5( UN,LN,T1,T2,T3,T4,T5) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB6( UN,LN,T1,T2,T3,T4,T5,T6) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB7( UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB8( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB9( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB11(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB12(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0) -#define PROTOCCALLSFSUB13(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0) - - -#define PROTOCCALLSFSUB15(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB16(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB17(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,CF_0,CF_0,CF_0) -#define PROTOCCALLSFSUB18(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,CF_0,CF_0) -#define PROTOCCALLSFSUB19(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,CF_0) - - -#ifndef FCALLSC_QUALIFIER -#ifdef VISUAL_CPLUSPLUS -#define FCALLSC_QUALIFIER __stdcall -#else -#define FCALLSC_QUALIFIER -#endif -#endif - -#ifdef __cplusplus -#define CFextern extern "C" -#else -#define CFextern extern -#endif - - -#ifdef CFSUBASFUN -#define PROTOCCALLSFSUB0(UN,LN) \ - PROTOCCALLSFFUN0( VOID,UN,LN) -#define PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - PROTOCCALLSFFUN14(VOID,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK)\ - PROTOCCALLSFFUN20(VOID,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) -#else -/* Note: Prevent compiler warnings, null #define PROTOCCALLSFSUB14/20 after - #include-ing cfortran.h if calling the FORTRAN wrapper within the same - source code where the wrapper is created. */ -#define PROTOCCALLSFSUB0(UN,LN) CFextern void FCALLSC_QUALIFIER CFC_(UN,LN)(); -#ifndef __CF__KnR -#define PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFextern void FCALLSC_QUALIFIER CFC_(UN,LN)( CFARGT14(NCF,KCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ); -#define PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK)\ - CFextern void FCALLSC_QUALIFIER CFC_(UN,LN)( CFARGT20(NCF,KCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) ); -#else -#define PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - PROTOCCALLSFSUB0(UN,LN) -#define PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - PROTOCCALLSFSUB0(UN,LN) -#endif -#endif - - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - - -#define CCALLSFSUB1( UN,LN,T1, A1) \ - CCALLSFSUB5 (UN,LN,T1,CF_0,CF_0,CF_0,CF_0,A1,0,0,0,0) -#define CCALLSFSUB2( UN,LN,T1,T2, A1,A2) \ - CCALLSFSUB5 (UN,LN,T1,T2,CF_0,CF_0,CF_0,A1,A2,0,0,0) -#define CCALLSFSUB3( UN,LN,T1,T2,T3, A1,A2,A3) \ - CCALLSFSUB5 (UN,LN,T1,T2,T3,CF_0,CF_0,A1,A2,A3,0,0) -#define CCALLSFSUB4( UN,LN,T1,T2,T3,T4, A1,A2,A3,A4)\ - CCALLSFSUB5 (UN,LN,T1,T2,T3,T4,CF_0,A1,A2,A3,A4,0) -#define CCALLSFSUB5( UN,LN,T1,T2,T3,T4,T5, A1,A2,A3,A4,A5) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,0,0,0,0,0) -#define CCALLSFSUB6( UN,LN,T1,T2,T3,T4,T5,T6, A1,A2,A3,A4,A5,A6) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,0,0,0,0) -#define CCALLSFSUB7( UN,LN,T1,T2,T3,T4,T5,T6,T7, A1,A2,A3,A4,A5,A6,A7) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,0,0,0) -#define CCALLSFSUB8( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8, A1,A2,A3,A4,A5,A6,A7,A8) \ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,0,0) -#define CCALLSFSUB9( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,A1,A2,A3,A4,A5,A6,A7,A8,A9)\ - CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,0) -#define CCALLSFSUB10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,0,0,0,0) -#define CCALLSFSUB11(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,0,0,0) -#define CCALLSFSUB12(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,0,0) -#define CCALLSFSUB13(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD)\ - CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,0) - -#ifdef __cplusplus -#define CPPPROTOCLSFSUB0( UN,LN) -#define CPPPROTOCLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define CPPPROTOCLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) -#else -#define CPPPROTOCLSFSUB0(UN,LN) \ - PROTOCCALLSFSUB0(UN,LN) -#define CPPPROTOCLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - PROTOCCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) -#define CPPPROTOCLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - PROTOCCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) -#endif - -#ifdef CFSUBASFUN -#define CCALLSFSUB0(UN,LN) CCALLSFFUN0(UN,LN) -#define CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) -#else -/* do{...}while(FALSE) allows if(a==b) FORT(); else BORT(); */ -#define CCALLSFSUB0( UN,LN) do{CPPPROTOCLSFSUB0(UN,LN) CFC_(UN,LN)();}while(FALSE) -#define CCALLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE)\ -do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5) \ - VVCF(T6,A6,B6) VVCF(T7,A7,B7) VVCF(T8,A8,B8) VVCF(T9,A9,B9) VVCF(TA,AA,BA) \ - VVCF(TB,AB,BB) VVCF(TC,AC,BC) VVCF(TD,AD,BD) VVCF(TE,AE,BE) \ - CPPPROTOCLSFSUB14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - ACF(LN,T1,A1,1) ACF(LN,T2,A2,2) ACF(LN,T3,A3,3) \ - ACF(LN,T4,A4,4) ACF(LN,T5,A5,5) ACF(LN,T6,A6,6) ACF(LN,T7,A7,7) \ - ACF(LN,T8,A8,8) ACF(LN,T9,A9,9) ACF(LN,TA,AA,A) ACF(LN,TB,AB,B) \ - ACF(LN,TC,AC,C) ACF(LN,TD,AD,D) ACF(LN,TE,AE,E) \ - CFC_(UN,LN)( CFARGTA14(AACF,JCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE) );\ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) \ - WCF(T6,A6,6) WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) \ - WCF(TB,AB,B) WCF(TC,AC,C) WCF(TD,AD,D) WCF(TE,AE,E) }while(FALSE) -#endif - - -#if MAX_PREPRO_ARGS>31 -#define CCALLSFSUB15(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,CF_0,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,0,0,0,0,0) -#define CCALLSFSUB16(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,0,0,0,0) -#define CCALLSFSUB17(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,0,0,0) -#define CCALLSFSUB18(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,0,0) -#define CCALLSFSUB19(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ)\ - CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,0) - -#ifdef CFSUBASFUN -#define CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH, \ - TI,TJ,TK, A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ - CCALLSFFUN20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH, \ - TI,TJ,TK, A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) -#else -#define CCALLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH, \ - TI,TJ,TK, A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) \ -do{VVCF(T1,A1,B1) VVCF(T2,A2,B2) VVCF(T3,A3,B3) VVCF(T4,A4,B4) VVCF(T5,A5,B5) \ - VVCF(T6,A6,B6) VVCF(T7,A7,B7) VVCF(T8,A8,B8) VVCF(T9,A9,B9) VVCF(TA,AA,BA) \ - VVCF(TB,AB,BB) VVCF(TC,AC,BC) VVCF(TD,AD,BD) VVCF(TE,AE,BE) VVCF(TF,AF,BF) \ - VVCF(TG,AG,BG) VVCF(TH,AH,BH) VVCF(TI,AI,BI) VVCF(TJ,AJ,BJ) VVCF(TK,AK,BK) \ - CPPPROTOCLSFSUB20(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK) \ - ACF(LN,T1,A1,1) ACF(LN,T2,A2,2) ACF(LN,T3,A3,3) ACF(LN,T4,A4,4) \ - ACF(LN,T5,A5,5) ACF(LN,T6,A6,6) ACF(LN,T7,A7,7) ACF(LN,T8,A8,8) \ - ACF(LN,T9,A9,9) ACF(LN,TA,AA,A) ACF(LN,TB,AB,B) ACF(LN,TC,AC,C) \ - ACF(LN,TD,AD,D) ACF(LN,TE,AE,E) ACF(LN,TF,AF,F) ACF(LN,TG,AG,G) \ - ACF(LN,TH,AH,H) ACF(LN,TI,AI,I) ACF(LN,TJ,AJ,J) ACF(LN,TK,AK,K) \ - CFC_(UN,LN)( CFARGTA20(AACF,JCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,TF,TG,TH,TI,TJ,TK,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE,AF,AG,AH,AI,AJ,AK) ); \ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) WCF(T6,A6,6) \ - WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) WCF(TB,AB,B) WCF(TC,AC,C) \ - WCF(TD,AD,D) WCF(TE,AE,E) WCF(TF,AF,F) WCF(TG,AG,G) WCF(TH,AH,H) WCF(TI,AI,I) \ - WCF(TJ,AJ,J) WCF(TK,AK,K) }while(FALSE) -#endif -#endif /* MAX_PREPRO_ARGS */ - -/*-------------------------------------------------------------------------*/ - -/* UTILITIES FOR C TO CALL FORTRAN FUNCTIONS */ - -/*N.B. PROTOCCALLSFFUNn(..) generates code, whether or not the FORTRAN - function is called. Therefore, especially for creator's of C header files - for large FORTRAN libraries which include many functions, to reduce - compile time and object code size, it may be desirable to create - preprocessor directives to allow users to create code for only those - functions which they use. */ - -/* The following defines the maximum length string that a function can return. - Of course it may be undefine-d and re-define-d before individual - PROTOCCALLSFFUNn(..) as required. It would also be nice to have this derived - from the individual machines' limits. */ -#define MAX_LEN_FORTRAN_FUNCTION_STRING 0x4FE - -/* The following defines a character used by CFORTRAN.H to flag the end of a - string coming out of a FORTRAN routine. */ -#define CFORTRAN_NON_CHAR 0x7F - -#ifdef OLD_VAXC /* Prevent %CC-I-PARAMNOTUSED. */ -#pragma nostandard -#endif - -#define _SEP_(TN,C,COMMA) _(__SEP_,C)(TN,COMMA) -#define __SEP_0(TN,COMMA) -#define __SEP_1(TN,COMMA) _Icf(2,SEP,TN,COMMA,0) -#define INT_cfSEP(T,B) _(A,B) -#define INTV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define INTVVVVVVV_cfSEP(T,B) INT_cfSEP(T,B) -#define PINT_cfSEP(T,B) INT_cfSEP(T,B) -#define PVOID_cfSEP(T,B) INT_cfSEP(T,B) -#define ROUTINE_cfSEP(T,B) INT_cfSEP(T,B) -#define SIMPLE_cfSEP(T,B) INT_cfSEP(T,B) -#define VOID_cfSEP(T,B) INT_cfSEP(T,B) /* For FORTRAN calls C subr.s.*/ -#define STRING_cfSEP(T,B) INT_cfSEP(T,B) -#define STRINGV_cfSEP(T,B) INT_cfSEP(T,B) -#define PSTRING_cfSEP(T,B) INT_cfSEP(T,B) -#define PSTRINGV_cfSEP(T,B) INT_cfSEP(T,B) -#define PNSTRING_cfSEP(T,B) INT_cfSEP(T,B) -#define PPSTRING_cfSEP(T,B) INT_cfSEP(T,B) -#define ZTRINGV_cfSEP(T,B) INT_cfSEP(T,B) -#define PZTRINGV_cfSEP(T,B) INT_cfSEP(T,B) - -#if defined(SIGNED_BYTE) || !defined(UNSIGNED_BYTE) -#ifdef OLD_VAXC -#define INTEGER_BYTE char /* Old VAXC barfs on 'signed char' */ -#else -#define INTEGER_BYTE signed char /* default */ -#endif -#else -#define INTEGER_BYTE unsigned char -#endif -#define BYTEVVVVVVV_cfTYPE INTEGER_BYTE -#define DOUBLEVVVVVVV_cfTYPE DOUBLE_PRECISION -#define FLOATVVVVVVV_cfTYPE float -#define INTVVVVVVV_cfTYPE int -#define LOGICALVVVVVVV_cfTYPE int -#define LONGVVVVVVV_cfTYPE long -#define SHORTVVVVVVV_cfTYPE short -#define PBYTE_cfTYPE INTEGER_BYTE -#define PDOUBLE_cfTYPE DOUBLE_PRECISION -#define PFLOAT_cfTYPE float -#define PINT_cfTYPE int -#define PLOGICAL_cfTYPE int -#define PLONG_cfTYPE long -#define PSHORT_cfTYPE short - -#define CFARGS0(A,T,V,W,X,Y,Z) _3(T,_cf,A) -#define CFARGS1(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V) -#define CFARGS2(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W) -#define CFARGS3(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W,X) -#define CFARGS4(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W,X,Y) -#define CFARGS5(A,T,V,W,X,Y,Z) _3(T,_cf,A)(V,W,X,Y,Z) - -#define _Icf(N,T,I,X,Y) _(I,_cfINT)(N,T,I,X,Y,0) -#define _Icf4(N,T,I,X,Y,Z) _(I,_cfINT)(N,T,I,X,Y,Z) -#define BYTE_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define DOUBLE_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INT,B,X,Y,Z,0) -#define FLOAT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define INT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define LOGICAL_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define LONG_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define SHORT_cfINT(N,A,B,X,Y,Z) DOUBLE_cfINT(N,A,B,X,Y,Z) -#define PBYTE_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PDOUBLE_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,PINT,B,X,Y,Z,0) -#define PFLOAT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PINT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PLOGICAL_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PLONG_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define PSHORT_cfINT(N,A,B,X,Y,Z) PDOUBLE_cfINT(N,A,B,X,Y,Z) -#define BYTEV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define BYTEVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define BYTEVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define DOUBLEV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTV,B,X,Y,Z,0) -#define DOUBLEVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVV,B,X,Y,Z,0) -#define DOUBLEVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVV,B,X,Y,Z,0) -#define DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVV,B,X,Y,Z,0) -#define DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVVV,B,X,Y,Z,0) -#define DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVVVV,B,X,Y,Z,0) -#define DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,INTVVVVVVV,B,X,Y,Z,0) -#define FLOATV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define FLOATVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define FLOATVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define INTV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define INTVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define INTVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define INTVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LOGICALVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define LONGVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define LONGVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTV_cfINT(N,A,B,X,Y,Z) DOUBLEV_cfINT(N,A,B,X,Y,Z) -#define SHORTVV_cfINT(N,A,B,X,Y,Z) DOUBLEVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVV_cfINT(N,A,B,X,Y,Z) -#define SHORTVVVVVVV_cfINT(N,A,B,X,Y,Z) DOUBLEVVVVVVV_cfINT(N,A,B,X,Y,Z) -#define PVOID_cfINT(N,A,B,X,Y,Z) _(CFARGS,N)(A,B,B,X,Y,Z,0) -#define ROUTINE_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -/*CRAY coughs on the first, - i.e. the usual trouble of not being able to - define macros to macros with arguments. - New ultrix is worse, it coughs on all such uses. - */ -/*#define SIMPLE_cfINT PVOID_cfINT*/ -#define SIMPLE_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define VOID_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define STRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define STRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PSTRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PSTRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PNSTRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PPSTRING_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define ZTRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define PZTRINGV_cfINT(N,A,B,X,Y,Z) PVOID_cfINT(N,A,B,X,Y,Z) -#define CF_0_cfINT(N,A,B,X,Y,Z) - - -#define UCF(TN,I,C) _SEP_(TN,C,COMMA) _Icf(2,U,TN,_(A,I),0) -#define UUCF(TN,I,C) _SEP_(TN,C,COMMA) _SEP_(TN,1,I) -#define UUUCF(TN,I,C) _SEP_(TN,C,COLON) _Icf(2,U,TN,_(A,I),0) -#define INT_cfU(T,A) _(T,VVVVVVV_cfTYPE) A -#define INTV_cfU(T,A) _(T,VVVVVV_cfTYPE) * A -#define INTVV_cfU(T,A) _(T,VVVVV_cfTYPE) * A -#define INTVVV_cfU(T,A) _(T,VVVV_cfTYPE) * A -#define INTVVVV_cfU(T,A) _(T,VVV_cfTYPE) * A -#define INTVVVVV_cfU(T,A) _(T,VV_cfTYPE) * A -#define INTVVVVVV_cfU(T,A) _(T,V_cfTYPE) * A -#define INTVVVVVVV_cfU(T,A) _(T,_cfTYPE) * A -#define PINT_cfU(T,A) _(T,_cfTYPE) * A -#define PVOID_cfU(T,A) void *A -#define ROUTINE_cfU(T,A) void (*A)() -#define VOID_cfU(T,A) void A /* Needed for C calls FORTRAN sub.s. */ -#define STRING_cfU(T,A) char *A /* via VOID and wrapper. */ -#define STRINGV_cfU(T,A) char *A -#define PSTRING_cfU(T,A) char *A -#define PSTRINGV_cfU(T,A) char *A -#define ZTRINGV_cfU(T,A) char *A -#define PZTRINGV_cfU(T,A) char *A - -/* VOID breaks U into U and UU. */ -#define INT_cfUU(T,A) _(T,VVVVVVV_cfTYPE) A -#define VOID_cfUU(T,A) /* Needed for FORTRAN calls C sub.s. */ -#define STRING_cfUU(T,A) char *A - - -#define BYTE_cfPU(A) CFextern INTEGER_BYTE FCALLSC_QUALIFIER A -#define DOUBLE_cfPU(A) CFextern DOUBLE_PRECISION FCALLSC_QUALIFIER A -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfPU(A) CFextern float FCALLSC_QUALIFIER A -#else -#define FLOAT_cfPU(A) CFextern FLOATFUNCTIONTYPE FCALLSC_QUALIFIER A -#endif -#define INT_cfPU(A) CFextern int FCALLSC_QUALIFIER A -#define LOGICAL_cfPU(A) CFextern int FCALLSC_QUALIFIER A -#define LONG_cfPU(A) CFextern long FCALLSC_QUALIFIER A -#define SHORT_cfPU(A) CFextern short FCALLSC_QUALIFIER A -#define STRING_cfPU(A) CFextern void FCALLSC_QUALIFIER A -#define VOID_cfPU(A) CFextern void FCALLSC_QUALIFIER A - -#define BYTE_cfE INTEGER_BYTE A0; -#define DOUBLE_cfE DOUBLE_PRECISION A0; -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfE float A0; -#else -#define FLOAT_cfE float AA0; FLOATFUNCTIONTYPE A0; -#endif -#define INT_cfE int A0; -#define LOGICAL_cfE int A0; -#define LONG_cfE long A0; -#define SHORT_cfE short A0; -#define VOID_cfE -#ifdef vmsFortran -#define STRING_cfE static char AA0[1+MAX_LEN_FORTRAN_FUNCTION_STRING]; \ - static fstring A0 = \ - {MAX_LEN_FORTRAN_FUNCTION_STRING,DSC$K_DTYPE_T,DSC$K_CLASS_S,AA0};\ - memset(AA0, CFORTRAN_NON_CHAR, MAX_LEN_FORTRAN_FUNCTION_STRING);\ - *(AA0+MAX_LEN_FORTRAN_FUNCTION_STRING)='\0'; -#else -#ifdef CRAYFortran -#define STRING_cfE static char AA0[1+MAX_LEN_FORTRAN_FUNCTION_STRING]; \ - static _fcd A0; *(AA0+MAX_LEN_FORTRAN_FUNCTION_STRING)='\0';\ - memset(AA0,CFORTRAN_NON_CHAR, MAX_LEN_FORTRAN_FUNCTION_STRING);\ - A0 = _cptofcd(AA0,MAX_LEN_FORTRAN_FUNCTION_STRING); -#else -/* 'cc: SC3.0.1 13 Jul 1994' barfs on char A0[0x4FE+1]; - * char A0[0x4FE +1]; char A0[1+0x4FE]; are both OK. */ -#define STRING_cfE static char A0[1+MAX_LEN_FORTRAN_FUNCTION_STRING]; \ - memset(A0, CFORTRAN_NON_CHAR, \ - MAX_LEN_FORTRAN_FUNCTION_STRING); \ - *(A0+MAX_LEN_FORTRAN_FUNCTION_STRING)='\0'; -#endif -#endif -/* ESTRING must use static char. array which is guaranteed to exist after - function returns. */ - -/* N.B.i) The diff. for 0 (Zero) and >=1 arguments. - ii)That the following create an unmatched bracket, i.e. '(', which - must of course be matched in the call. - iii)Commas must be handled very carefully */ -#define INT_cfGZ(T,UN,LN) A0=CFC_(UN,LN)( -#define VOID_cfGZ(T,UN,LN) CFC_(UN,LN)( -#ifdef vmsFortran -#define STRING_cfGZ(T,UN,LN) CFC_(UN,LN)(&A0 -#else -#if defined(CRAYFortran) || defined(AbsoftUNIXFortran) -#define STRING_cfGZ(T,UN,LN) CFC_(UN,LN)( A0 -#else -#define STRING_cfGZ(T,UN,LN) CFC_(UN,LN)( A0,MAX_LEN_FORTRAN_FUNCTION_STRING -#endif -#endif - -#define INT_cfG(T,UN,LN) INT_cfGZ(T,UN,LN) -#define VOID_cfG(T,UN,LN) VOID_cfGZ(T,UN,LN) -#define STRING_cfG(T,UN,LN) STRING_cfGZ(T,UN,LN), /*, is only diff. from _cfG*/ - -#define BYTEVVVVVVV_cfPP -#define INTVVVVVVV_cfPP /* These complement FLOATVVVVVVV_cfPP. */ -#define DOUBLEVVVVVVV_cfPP -#define LOGICALVVVVVVV_cfPP -#define LONGVVVVVVV_cfPP -#define SHORTVVVVVVV_cfPP -#define PBYTE_cfPP -#define PINT_cfPP -#define PDOUBLE_cfPP -#define PLOGICAL_cfPP -#define PLONG_cfPP -#define PSHORT_cfPP -#define PFLOAT_cfPP FLOATVVVVVVV_cfPP - -#define BCF(TN,AN,C) _SEP_(TN,C,COMMA) _Icf(2,B,TN,AN,0) -#define INT_cfB(T,A) (_(T,VVVVVVV_cfTYPE)) A -#define INTV_cfB(T,A) A -#define INTVV_cfB(T,A) (A)[0] -#define INTVVV_cfB(T,A) (A)[0][0] -#define INTVVVV_cfB(T,A) (A)[0][0][0] -#define INTVVVVV_cfB(T,A) (A)[0][0][0][0] -#define INTVVVVVV_cfB(T,A) (A)[0][0][0][0][0] -#define INTVVVVVVV_cfB(T,A) (A)[0][0][0][0][0][0] -#define PINT_cfB(T,A) _(T,_cfPP)&A -#define STRING_cfB(T,A) (char *) A -#define STRINGV_cfB(T,A) (char *) A -#define PSTRING_cfB(T,A) (char *) A -#define PSTRINGV_cfB(T,A) (char *) A -#define PVOID_cfB(T,A) (void *) A -#define ROUTINE_cfB(T,A) (void(*)())A -#define ZTRINGV_cfB(T,A) (char *) A -#define PZTRINGV_cfB(T,A) (char *) A - -#define SCF(TN,NAME,I,A) _(TN,_cfSTR)(3,S,NAME,I,A,0,0) -#define DEFAULT_cfS(M,I,A) -#define LOGICAL_cfS(M,I,A) -#define PLOGICAL_cfS(M,I,A) -#define STRING_cfS(M,I,A) ,sizeof(A) -#define STRINGV_cfS(M,I,A) ,( (unsigned)0xFFFF*firstindexlength(A) \ - +secondindexlength(A)) -#define PSTRING_cfS(M,I,A) ,sizeof(A) -#define PSTRINGV_cfS(M,I,A) STRINGV_cfS(M,I,A) -#define ZTRINGV_cfS(M,I,A) -#define PZTRINGV_cfS(M,I,A) - -#define HCF(TN,I) _(TN,_cfSTR)(3,H,COMMA, H,_(C,I),0,0) -#define HHCF(TN,I) _(TN,_cfSTR)(3,H,COMMA,HH,_(C,I),0,0) -#define HHHCF(TN,I) _(TN,_cfSTR)(3,H,COLON, H,_(C,I),0,0) -#define H_CF_SPECIAL unsigned -#define HH_CF_SPECIAL -#define DEFAULT_cfH(M,I,A) -#define LOGICAL_cfH(S,U,B) -#define PLOGICAL_cfH(S,U,B) -#define STRING_cfH(S,U,B) _(A,S) _(U,_CF_SPECIAL) B -#define STRINGV_cfH(S,U,B) STRING_cfH(S,U,B) -#define PSTRING_cfH(S,U,B) STRING_cfH(S,U,B) -#define PSTRINGV_cfH(S,U,B) STRING_cfH(S,U,B) -#define PNSTRING_cfH(S,U,B) STRING_cfH(S,U,B) -#define PPSTRING_cfH(S,U,B) STRING_cfH(S,U,B) -#define ZTRINGV_cfH(S,U,B) -#define PZTRINGV_cfH(S,U,B) - -/* Need VOID_cfSTR because Absoft forced function types go through _cfSTR. */ -/* No spaces inside expansion. They screws up macro catenation kludge. */ -#define VOID_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOAT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define LONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define BYTEVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define DOUBLEVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define DOUBLEVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define FLOATVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define FLOATVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define INTVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define INTVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define LOGICALVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LOGICALVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define LONGVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define LONGVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,LOGICAL,A,B,C,D,E) -#define SHORTVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SHORTVVVVVVV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PBYTE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PDOUBLE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PFLOAT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PINT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PLOGICAL_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PLOGICAL,A,B,C,D,E) -#define PLONG_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define PSHORT_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define STRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,STRING,A,B,C,D,E) -#define PSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PSTRING,A,B,C,D,E) -#define STRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,STRINGV,A,B,C,D,E) -#define PSTRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PSTRINGV,A,B,C,D,E) -#define PNSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PNSTRING,A,B,C,D,E) -#define PPSTRING_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PPSTRING,A,B,C,D,E) -#define PVOID_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define ROUTINE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define SIMPLE_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,DEFAULT,A,B,C,D,E) -#define ZTRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,ZTRINGV,A,B,C,D,E) -#define PZTRINGV_cfSTR(N,T,A,B,C,D,E) _(CFARGS,N)(T,PZTRINGV,A,B,C,D,E) -#define CF_0_cfSTR(N,T,A,B,C,D,E) - -/* See ACF table comments, which explain why CCF was split into two. */ -#define CCF(NAME,TN,I) _(TN,_cfSTR)(5,C,NAME,I,_(A,I),_(B,I),_(C,I)) -#define DEFAULT_cfC(M,I,A,B,C) -#define LOGICAL_cfC(M,I,A,B,C) A=C2FLOGICAL( A); -#define PLOGICAL_cfC(M,I,A,B,C) *A=C2FLOGICAL(*A); -#ifdef vmsFortran -#define STRING_cfC(M,I,A,B,C) (B.clen=strlen(A),B.f.dsc$a_pointer=A, \ - C==sizeof(char*)||C==(unsigned)(B.clen+1)?B.f.dsc$w_length=B.clen: \ - (memset((A)+B.clen,' ',C-B.clen-1),A[B.f.dsc$w_length=C-1]='\0')); - /* PSTRING_cfC to beware of array A which does not contain any \0. */ -#define PSTRING_cfC(M,I,A,B,C) (B.dsc$a_pointer=A, C==sizeof(char*) ? \ - B.dsc$w_length=strlen(A): (A[C-1]='\0',B.dsc$w_length=strlen(A), \ - memset((A)+B.dsc$w_length,' ',C-B.dsc$w_length-1), B.dsc$w_length=C-1)); -#else -#define STRING_cfC(M,I,A,B,C) (B.clen=strlen(A), \ - C==sizeof(char*)||C==(unsigned)(B.clen+1)?B.flen=B.clen: \ - (memset((A)+B.clen,' ',C-B.clen-1),A[B.flen=C-1]='\0')); -#define PSTRING_cfC(M,I,A,B,C) (C==sizeof(char*)? B=strlen(A): \ - (A[C-1]='\0',B=strlen(A),memset((A)+B,' ',C-B-1),B=C-1)); -#endif - /* For CRAYFortran for (P)STRINGV_cfC, B.fs is set, but irrelevant. */ -#define STRINGV_cfC(M,I,A,B,C) \ - AATRINGV_cfA( A,B,(C/0xFFFF)*(C%0xFFFF),C/0xFFFF,C%0xFFFF) -#define PSTRINGV_cfC(M,I,A,B,C) \ - APATRINGV_cfA( A,B,(C/0xFFFF)*(C%0xFFFF),C/0xFFFF,C%0xFFFF) -#define ZTRINGV_cfC(M,I,A,B,C) \ - AATRINGV_cfA( A,B, (_3(M,_ELEMS_,I))*((_3(M,_ELEMLEN_,I))+1), \ - (_3(M,_ELEMS_,I)), (_3(M,_ELEMLEN_,I))+1 ) -#define PZTRINGV_cfC(M,I,A,B,C) \ - APATRINGV_cfA( A,B, (_3(M,_ELEMS_,I))*((_3(M,_ELEMLEN_,I))+1), \ - (_3(M,_ELEMS_,I)), (_3(M,_ELEMLEN_,I))+1 ) - -#define BYTE_cfCCC(A,B) &A -#define DOUBLE_cfCCC(A,B) &A -#if !defined(__CF__KnR) -#define FLOAT_cfCCC(A,B) &A - /* Although the VAX doesn't, at least the */ -#else /* HP and K&R mips promote float arg.'s of */ -#define FLOAT_cfCCC(A,B) &B /* unprototyped functions to double. Cannot */ -#endif /* use A here to pass the argument to FORTRAN. */ -#define INT_cfCCC(A,B) &A -#define LOGICAL_cfCCC(A,B) &A -#define LONG_cfCCC(A,B) &A -#define SHORT_cfCCC(A,B) &A -#define PBYTE_cfCCC(A,B) A -#define PDOUBLE_cfCCC(A,B) A -#define PFLOAT_cfCCC(A,B) A -#define PINT_cfCCC(A,B) A -#define PLOGICAL_cfCCC(A,B) B=A /* B used to keep a common W table. */ -#define PLONG_cfCCC(A,B) A -#define PSHORT_cfCCC(A,B) A - -#define CCCF(TN,I,M) _SEP_(TN,M,COMMA) _Icf(3,CC,TN,_(A,I),_(B,I)) -#define INT_cfCC(T,A,B) _(T,_cfCCC)(A,B) -#define INTV_cfCC(T,A,B) A -#define INTVV_cfCC(T,A,B) A -#define INTVVV_cfCC(T,A,B) A -#define INTVVVV_cfCC(T,A,B) A -#define INTVVVVV_cfCC(T,A,B) A -#define INTVVVVVV_cfCC(T,A,B) A -#define INTVVVVVVV_cfCC(T,A,B) A -#define PINT_cfCC(T,A,B) _(T,_cfCCC)(A,B) -#define PVOID_cfCC(T,A,B) A -#if defined(apolloFortran) || defined(hpuxFortran800) || defined(AbsoftUNIXFortran) -#define ROUTINE_cfCC(T,A,B) &A -#else -#define ROUTINE_cfCC(T,A,B) A -#endif -#define SIMPLE_cfCC(T,A,B) A -#ifdef vmsFortran -#define STRING_cfCC(T,A,B) &B.f -#define STRINGV_cfCC(T,A,B) &B -#define PSTRING_cfCC(T,A,B) &B -#define PSTRINGV_cfCC(T,A,B) &B -#else -#ifdef CRAYFortran -#define STRING_cfCC(T,A,B) _cptofcd(A,B.flen) -#define STRINGV_cfCC(T,A,B) _cptofcd(B.s,B.flen) -#define PSTRING_cfCC(T,A,B) _cptofcd(A,B) -#define PSTRINGV_cfCC(T,A,B) _cptofcd(A,B.flen) -#else -#define STRING_cfCC(T,A,B) A -#define STRINGV_cfCC(T,A,B) B.fs -#define PSTRING_cfCC(T,A,B) A -#define PSTRINGV_cfCC(T,A,B) B.fs -#endif -#endif -#define ZTRINGV_cfCC(T,A,B) STRINGV_cfCC(T,A,B) -#define PZTRINGV_cfCC(T,A,B) PSTRINGV_cfCC(T,A,B) - -#define BYTE_cfX return A0; -#define DOUBLE_cfX return A0; -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfX return A0; -#else -#define FLOAT_cfX ASSIGNFLOAT(AA0,A0); return AA0; -#endif -#define INT_cfX return A0; -#define LOGICAL_cfX return F2CLOGICAL(A0); -#define LONG_cfX return A0; -#define SHORT_cfX return A0; -#define VOID_cfX return ; -#if defined(vmsFortran) || defined(CRAYFortran) -#define STRING_cfX return kill_trailing( \ - kill_trailing(AA0,CFORTRAN_NON_CHAR),' '); -#else -#define STRING_cfX return kill_trailing( \ - kill_trailing( A0,CFORTRAN_NON_CHAR),' '); -#endif - -#define CFFUN(NAME) _(__cf__,NAME) - -/* Note that we don't use LN here, but we keep it for consistency. */ -#define CCALLSFFUN0(UN,LN) CFFUN(UN)() - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - -#define CCALLSFFUN1( UN,LN,T1, A1) \ - CCALLSFFUN5 (UN,LN,T1,CF_0,CF_0,CF_0,CF_0,A1,0,0,0,0) -#define CCALLSFFUN2( UN,LN,T1,T2, A1,A2) \ - CCALLSFFUN5 (UN,LN,T1,T2,CF_0,CF_0,CF_0,A1,A2,0,0,0) -#define CCALLSFFUN3( UN,LN,T1,T2,T3, A1,A2,A3) \ - CCALLSFFUN5 (UN,LN,T1,T2,T3,CF_0,CF_0,A1,A2,A3,0,0) -#define CCALLSFFUN4( UN,LN,T1,T2,T3,T4, A1,A2,A3,A4)\ - CCALLSFFUN5 (UN,LN,T1,T2,T3,T4,CF_0,A1,A2,A3,A4,0) -#define CCALLSFFUN5( UN,LN,T1,T2,T3,T4,T5, A1,A2,A3,A4,A5) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,0,0,0,0,0) -#define CCALLSFFUN6( UN,LN,T1,T2,T3,T4,T5,T6, A1,A2,A3,A4,A5,A6) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,0,0,0,0) -#define CCALLSFFUN7( UN,LN,T1,T2,T3,T4,T5,T6,T7, A1,A2,A3,A4,A5,A6,A7) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,0,0,0) -#define CCALLSFFUN8( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8, A1,A2,A3,A4,A5,A6,A7,A8) \ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,0,0) -#define CCALLSFFUN9( UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,A1,A2,A3,A4,A5,A6,A7,A8,A9)\ - CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,0) -#define CCALLSFFUN10(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,0,0,0,0) -#define CCALLSFFUN11(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,0,0,0) -#define CCALLSFFUN12(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,0,0) -#define CCALLSFFUN13(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD)\ - CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,0) - -#define CCALLSFFUN14(UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE,A1,A2,A3,A4,A5,A6,A7,A8,A9,AA,AB,AC,AD,AE)\ -((CFFUN(UN)( BCF(T1,A1,0) BCF(T2,A2,1) BCF(T3,A3,1) BCF(T4,A4,1) BCF(T5,A5,1) \ - BCF(T6,A6,1) BCF(T7,A7,1) BCF(T8,A8,1) BCF(T9,A9,1) BCF(TA,AA,1) \ - BCF(TB,AB,1) BCF(TC,AC,1) BCF(TD,AD,1) BCF(TE,AE,1) \ - SCF(T1,LN,1,A1) SCF(T2,LN,2,A2) SCF(T3,LN,3,A3) SCF(T4,LN,4,A4) \ - SCF(T5,LN,5,A5) SCF(T6,LN,6,A6) SCF(T7,LN,7,A7) SCF(T8,LN,8,A8) \ - SCF(T9,LN,9,A9) SCF(TA,LN,A,AA) SCF(TB,LN,B,AB) SCF(TC,LN,C,AC) \ - SCF(TD,LN,D,AD)))) - -/* N.B. Create a separate function instead of using (call function, function -value here) because in order to create the variables needed for the input -arg.'s which may be const.'s one has to do the creation within {}, but these -can never be placed within ()'s. Therefore one must create wrapper functions. -gcc, on the other hand may be able to avoid the wrapper functions. */ - -/* Prototypes are needed to correctly handle the value returned correctly. N.B. -Can only have prototype arg.'s with difficulty, a la G... table since FORTRAN -functions returning strings have extra arg.'s. Don't bother, since this only -causes a compiler warning to come up when one uses FCALLSCFUNn and CCALLSFFUNn -for the same function in the same source code. Something done by the experts in -debugging only.*/ - -#define PROTOCCALLSFFUN0(F,UN,LN) \ -_(F,_cfPU)( CFC_(UN,LN))(CF_NULL_PROTO); \ -static _Icf(2,U,F,CFFUN(UN),0)() {_(F,_cfE) _Icf(3,GZ,F,UN,LN) ABSOFT_cf1(F));_(F,_cfX)} - -#define PROTOCCALLSFFUN1( T0,UN,LN,T1) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN2( T0,UN,LN,T1,T2) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,T2,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN3( T0,UN,LN,T1,T2,T3) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,T2,T3,CF_0,CF_0) -#define PROTOCCALLSFFUN4( T0,UN,LN,T1,T2,T3,T4) \ - PROTOCCALLSFFUN5 (T0,UN,LN,T1,T2,T3,T4,CF_0) -#define PROTOCCALLSFFUN5( T0,UN,LN,T1,T2,T3,T4,T5) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN6( T0,UN,LN,T1,T2,T3,T4,T5,T6) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN7( T0,UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN8( T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0) -#define PROTOCCALLSFFUN9( T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0) -#define PROTOCCALLSFFUN10(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN11(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0) -#define PROTOCCALLSFFUN12(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0) -#define PROTOCCALLSFFUN13(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0) - -/* HP/UX 9.01 cc requires the blank between '_Icf(3,G,T0,UN,LN) CCCF(T1,1,0)' */ - -#ifndef __CF__KnR -#define PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _(T0,_cfPU)(CFC_(UN,LN))(CF_NULL_PROTO); static _Icf(2,U,T0,CFFUN(UN),0)( \ - CFARGT14FS(UCF,HCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ) \ -{ CFARGT14S(VCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfE) \ - CCF(LN,T1,1) CCF(LN,T2,2) CCF(LN,T3,3) CCF(LN,T4,4) CCF(LN,T5,5) \ - CCF(LN,T6,6) CCF(LN,T7,7) CCF(LN,T8,8) CCF(LN,T9,9) CCF(LN,TA,A) \ - CCF(LN,TB,B) CCF(LN,TC,C) CCF(LN,TD,D) CCF(LN,TE,E) _Icf(3,G,T0,UN,LN) \ - CFARGT14(CCCF,JCF,ABSOFT_cf1(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)); \ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) \ - WCF(T6,A6,6) WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) \ - WCF(TB,AB,B) WCF(TC,AC,C) WCF(TD,AD,D) WCF(TE,AE,E) _(T0,_cfX)} -#else -#define PROTOCCALLSFFUN14(T0,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _(T0,_cfPU)(CFC_(UN,LN))(CF_NULL_PROTO); static _Icf(2,U,T0,CFFUN(UN),0)( \ - CFARGT14FS(UUCF,HHCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ) \ - CFARGT14FS(UUUCF,HHHCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) ; \ -{ CFARGT14S(VCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfE) \ - CCF(LN,T1,1) CCF(LN,T2,2) CCF(LN,T3,3) CCF(LN,T4,4) CCF(LN,T5,5) \ - CCF(LN,T6,6) CCF(LN,T7,7) CCF(LN,T8,8) CCF(LN,T9,9) CCF(LN,TA,A) \ - CCF(LN,TB,B) CCF(LN,TC,C) CCF(LN,TD,D) CCF(LN,TE,E) _Icf(3,G,T0,UN,LN) \ - CFARGT14(CCCF,JCF,ABSOFT_cf1(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)); \ - WCF(T1,A1,1) WCF(T2,A2,2) WCF(T3,A3,3) WCF(T4,A4,4) WCF(T5,A5,5) \ - WCF(T6,A6,6) WCF(T7,A7,7) WCF(T8,A8,8) WCF(T9,A9,9) WCF(TA,AA,A) \ - WCF(TB,AB,B) WCF(TC,AC,C) WCF(TD,AD,D) WCF(TE,AE,E) _(T0,_cfX)} -#endif - -/*-------------------------------------------------------------------------*/ - -/* UTILITIES FOR FORTRAN TO CALL C ROUTINES */ - -#ifdef OLD_VAXC /* Prevent %CC-I-PARAMNOTUSED. */ -#pragma nostandard -#endif - -#if defined(vmsFortran) || defined(CRAYFortran) -#define DCF(TN,I) -#define DDCF(TN,I) -#define DDDCF(TN,I) -#else -#define DCF(TN,I) HCF(TN,I) -#define DDCF(TN,I) HHCF(TN,I) -#define DDDCF(TN,I) HHHCF(TN,I) -#endif - -#define QCF(TN,I) _(TN,_cfSTR)(1,Q,_(B,I), 0,0,0,0) -#define DEFAULT_cfQ(B) -#define LOGICAL_cfQ(B) -#define PLOGICAL_cfQ(B) -#define STRINGV_cfQ(B) char *B; unsigned int _(B,N); -#define STRING_cfQ(B) char *B=NULL; -#define PSTRING_cfQ(B) char *B=NULL; -#define PSTRINGV_cfQ(B) STRINGV_cfQ(B) -#define PNSTRING_cfQ(B) char *B=NULL; -#define PPSTRING_cfQ(B) - -#if defined(apolloFortran) || defined(hpuxFortran800) || defined(AbsoftUNIXFortran) -#define ROUTINE_orig (void *)* /* Else, function value has to match. */ -#else /* !apolloFortran */ -#ifdef __sgi /* Else SGI gives warning 182 contrary to its C LRM A.17.7 */ -#define ROUTINE_orig *(void**)& -#else /* !__sgi */ -#define ROUTINE_orig (void *) -#endif /* __sgi */ -#endif /* apolloFortran */ - -#define ROUTINE_1 ROUTINE_orig -#define ROUTINE_2 ROUTINE_orig -#define ROUTINE_3 ROUTINE_orig -#define ROUTINE_4 ROUTINE_orig -#define ROUTINE_5 ROUTINE_orig -#define ROUTINE_6 ROUTINE_orig -#define ROUTINE_7 ROUTINE_orig -#define ROUTINE_8 ROUTINE_orig -#define ROUTINE_9 ROUTINE_orig -#define ROUTINE_10 ROUTINE_orig - -#define TCF(NAME,TN,I,M) _SEP_(TN,M,COMMA) _(TN,_cfT)(NAME,I,_(A,I),_(B,I),_(C,I)) -#define BYTE_cfT(M,I,A,B,D) *A -#define DOUBLE_cfT(M,I,A,B,D) *A -#define FLOAT_cfT(M,I,A,B,D) *A -#define INT_cfT(M,I,A,B,D) *A -#define LOGICAL_cfT(M,I,A,B,D) F2CLOGICAL(*A) -#define LONG_cfT(M,I,A,B,D) *A -#define SHORT_cfT(M,I,A,B,D) *A -#define BYTEV_cfT(M,I,A,B,D) A -#define DOUBLEV_cfT(M,I,A,B,D) A -#define FLOATV_cfT(M,I,A,B,D) VOIDP A -#define INTV_cfT(M,I,A,B,D) A -#define LOGICALV_cfT(M,I,A,B,D) A -#define LONGV_cfT(M,I,A,B,D) A -#define SHORTV_cfT(M,I,A,B,D) A -#define BYTEVV_cfT(M,I,A,B,D) (void *)A /* We have to cast to void *,*/ -#define BYTEVVV_cfT(M,I,A,B,D) (void *)A /* since we don't know the */ -#define BYTEVVVV_cfT(M,I,A,B,D) (void *)A /* dimensions of the array. */ -#define BYTEVVVVV_cfT(M,I,A,B,D) (void *)A /* i.e. Unfortunately, can't */ -#define BYTEVVVVVV_cfT(M,I,A,B,D) (void *)A /* check that the type */ -#define BYTEVVVVVVV_cfT(M,I,A,B,D) (void *)A /* matches the prototype. */ -#define DOUBLEVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVVVV_cfT(M,I,A,B,D) (void *)A -#define DOUBLEVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVVVV_cfT(M,I,A,B,D) (void *)A -#define FLOATVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVV_cfT(M,I,A,B,D) (void *)A -#define INTVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVVVV_cfT(M,I,A,B,D) (void *)A -#define INTVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LOGICALVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVVVV_cfT(M,I,A,B,D) (void *)A -#define LONGVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVVVV_cfT(M,I,A,B,D) (void *)A -#define SHORTVVVVVVV_cfT(M,I,A,B,D) (void *)A -#define PBYTE_cfT(M,I,A,B,D) A -#define PDOUBLE_cfT(M,I,A,B,D) A -#define PFLOAT_cfT(M,I,A,B,D) VOIDP A -#define PINT_cfT(M,I,A,B,D) A -#define PLOGICAL_cfT(M,I,A,B,D) ((*A=F2CLOGICAL(*A)),A) -#define PLONG_cfT(M,I,A,B,D) A -#define PSHORT_cfT(M,I,A,B,D) A -#define PVOID_cfT(M,I,A,B,D) A -#define ROUTINE_cfT(M,I,A,B,D) _(ROUTINE_,I) A -/* A == pointer to the characters - D == length of the string, or of an element in an array of strings - E == number of elements in an array of strings */ -#define TTSTR( A,B,D) \ - ((B=(char*)malloc(D+1))[D]='\0', memcpy(B,A,D), kill_trailing(B,' ')) -#define TTTTSTR( A,B,D) (!(D<4||A[0]||A[1]||A[2]||A[3]))?NULL: \ - memchr(A,'\0',D) ?A : TTSTR(A,B,D) -#define TTTTSTRV( A,B,D,E) (_(B,N)=E,B=(char*)malloc(_(B,N)*(D+1)), (void *) \ - vkill_trailing(f2cstrv(A,B,D+1, _(B,N)*(D+1)), D+1,_(B,N)*(D+1),' ')) -#ifdef vmsFortran -#define STRING_cfT(M,I,A,B,D) TTTTSTR( A->dsc$a_pointer,B,A->dsc$w_length) -#define STRINGV_cfT(M,I,A,B,D) TTTTSTRV(A->dsc$a_pointer, B, \ - A->dsc$w_length , A->dsc$l_m[0]) -#define PSTRING_cfT(M,I,A,B,D) TTSTR( A->dsc$a_pointer,B,A->dsc$w_length) -#define PPSTRING_cfT(M,I,A,B,D) A->dsc$a_pointer -#else -#ifdef CRAYFortran -#define STRING_cfT(M,I,A,B,D) TTTTSTR( _fcdtocp(A),B,_fcdlen(A)) -#define STRINGV_cfT(M,I,A,B,D) TTTTSTRV(_fcdtocp(A),B,_fcdlen(A), \ - num_elem(_fcdtocp(A),_fcdlen(A),_3(M,_STRV_A,I))) -#define PSTRING_cfT(M,I,A,B,D) TTSTR( _fcdtocp(A),B,_fcdlen(A)) -#define PPSTRING_cfT(M,I,A,B,D) _fcdtocp(A) -#else -#define STRING_cfT(M,I,A,B,D) TTTTSTR( A,B,D) -#define STRINGV_cfT(M,I,A,B,D) TTTTSTRV(A,B,D, num_elem(A,D,_3(M,_STRV_A,I))) -#define PSTRING_cfT(M,I,A,B,D) TTSTR( A,B,D) -#define PPSTRING_cfT(M,I,A,B,D) A -#endif -#endif -#define PNSTRING_cfT(M,I,A,B,D) STRING_cfT(M,I,A,B,D) -#define PSTRINGV_cfT(M,I,A,B,D) STRINGV_cfT(M,I,A,B,D) -#define CF_0_cfT(M,I,A,B,D) - -#define RCF(TN,I) _(TN,_cfSTR)(3,R,_(A,I),_(B,I),_(C,I),0,0) -#define DEFAULT_cfR(A,B,D) -#define LOGICAL_cfR(A,B,D) -#define PLOGICAL_cfR(A,B,D) *A=C2FLOGICAL(*A); -#define STRING_cfR(A,B,D) if (B) free(B); -#define STRINGV_cfR(A,B,D) free(B); -/* A and D as defined above for TSTRING(V) */ -#define RRRRPSTR( A,B,D) if (B) memcpy(A,B, _cfMIN(strlen(B),D)), \ - (D>strlen(B)?memset(A+strlen(B),' ', D-strlen(B)):0), free(B); -#define RRRRPSTRV(A,B,D) c2fstrv(B,A,D+1,(D+1)*_(B,N)), free(B); -#ifdef vmsFortran -#define PSTRING_cfR(A,B,D) RRRRPSTR( A->dsc$a_pointer,B,A->dsc$w_length) -#define PSTRINGV_cfR(A,B,D) RRRRPSTRV(A->dsc$a_pointer,B,A->dsc$w_length) -#else -#ifdef CRAYFortran -#define PSTRING_cfR(A,B,D) RRRRPSTR( _fcdtocp(A),B,_fcdlen(A)) -#define PSTRINGV_cfR(A,B,D) RRRRPSTRV(_fcdtocp(A),B,_fcdlen(A)) -#else -#define PSTRING_cfR(A,B,D) RRRRPSTR( A,B,D) -#define PSTRINGV_cfR(A,B,D) RRRRPSTRV(A,B,D) -#endif -#endif -#define PNSTRING_cfR(A,B,D) PSTRING_cfR(A,B,D) -#define PPSTRING_cfR(A,B,D) - -#define BYTE_cfFZ(UN,LN) INTEGER_BYTE FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define DOUBLE_cfFZ(UN,LN) DOUBLE_PRECISION FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define INT_cfFZ(UN,LN) int FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define LOGICAL_cfFZ(UN,LN) int FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define LONG_cfFZ(UN,LN) long FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define SHORT_cfFZ(UN,LN) short FCALLSC_QUALIFIER fcallsc(UN,LN)( -#define VOID_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)( -#ifndef __CF__KnR -/* The void is req'd by the Apollo, to make this an ANSI function declaration. - The Apollo promotes K&R float functions to double. */ -#define FLOAT_cfFZ(UN,LN) float FCALLSC_QUALIFIER fcallsc(UN,LN)(void -#ifdef vmsFortran -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(fstring *AS -#else -#ifdef CRAYFortran -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(_fcd AS -#else -#if defined(AbsoftUNIXFortran) -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(char *AS -#else -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(char *AS, unsigned D0 -#endif -#endif -#endif -#else -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfFZ(UN,LN) float FCALLSC_QUALIFIER fcallsc(UN,LN)( -#else -#define FLOAT_cfFZ(UN,LN) FLOATFUNCTIONTYPE FCALLSC_QUALIFIER fcallsc(UN,LN)( -#endif -#if defined(vmsFortran) || defined(CRAYFortran) || defined(AbsoftUNIXFortran) -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(AS -#else -#define STRING_cfFZ(UN,LN) void FCALLSC_QUALIFIER fcallsc(UN,LN)(AS, D0 -#endif -#endif - -#define BYTE_cfF(UN,LN) BYTE_cfFZ(UN,LN) -#define DOUBLE_cfF(UN,LN) DOUBLE_cfFZ(UN,LN) -#ifndef __CF_KnR -#define FLOAT_cfF(UN,LN) float FCALLSC_QUALIFIER fcallsc(UN,LN)( -#else -#define FLOAT_cfF(UN,LN) FLOAT_cfFZ(UN,LN) -#endif -#define INT_cfF(UN,LN) INT_cfFZ(UN,LN) -#define LOGICAL_cfF(UN,LN) LOGICAL_cfFZ(UN,LN) -#define LONG_cfF(UN,LN) LONG_cfFZ(UN,LN) -#define SHORT_cfF(UN,LN) SHORT_cfFZ(UN,LN) -#define VOID_cfF(UN,LN) VOID_cfFZ(UN,LN) -#define STRING_cfF(UN,LN) STRING_cfFZ(UN,LN), - -#define INT_cfFF -#define VOID_cfFF -#ifdef vmsFortran -#define STRING_cfFF fstring *AS; -#else -#ifdef CRAYFortran -#define STRING_cfFF _fcd AS; -#else -#define STRING_cfFF char *AS; unsigned D0; -#endif -#endif - -#define INT_cfL A0= -#define STRING_cfL A0= -#define VOID_cfL - -#define INT_cfK -#define VOID_cfK -/* KSTRING copies the string into the position provided by the caller. */ -#ifdef vmsFortran -#define STRING_cfK \ - memcpy(AS->dsc$a_pointer,A0,_cfMIN(AS->dsc$w_length,(A0==NULL?0:strlen(A0))));\ - AS->dsc$w_length>(A0==NULL?0:strlen(A0))? \ - memset(AS->dsc$a_pointer+(A0==NULL?0:strlen(A0)),' ', \ - AS->dsc$w_length-(A0==NULL?0:strlen(A0))):0; -#else -#ifdef CRAYFortran -#define STRING_cfK \ - memcpy(_fcdtocp(AS),A0, _cfMIN(_fcdlen(AS),(A0==NULL?0:strlen(A0))) ); \ - _fcdlen(AS)>(A0==NULL?0:strlen(A0))? \ - memset(_fcdtocp(AS)+(A0==NULL?0:strlen(A0)),' ', \ - _fcdlen(AS)-(A0==NULL?0:strlen(A0))):0; -#else -#define STRING_cfK memcpy(AS,A0, _cfMIN(D0,(A0==NULL?0:strlen(A0))) ); \ - D0>(A0==NULL?0:strlen(A0))?memset(AS+(A0==NULL?0:strlen(A0)), \ - ' ', D0-(A0==NULL?0:strlen(A0))):0; -#endif -#endif - -/* Note that K.. and I.. can't be combined since K.. has to access data before -R.., in order for functions returning strings which are also passed in as -arguments to work correctly. Note that R.. frees and hence may corrupt the -string. */ -#define BYTE_cfI return A0; -#define DOUBLE_cfI return A0; -#if ! (defined(FLOATFUNCTIONTYPE)&&defined(ASSIGNFLOAT)&&defined(RETURNFLOAT)) -#define FLOAT_cfI return A0; -#else -#define FLOAT_cfI RETURNFLOAT(A0); -#endif -#define INT_cfI return A0; -#ifdef hpuxFortran800 -/* Incredibly, functions must return true as 1, elsewhere .true.==0x01000000. */ -#define LOGICAL_cfI return ((A0)?1:0); -#else -#define LOGICAL_cfI return C2FLOGICAL(A0); -#endif -#define LONG_cfI return A0; -#define SHORT_cfI return A0; -#define STRING_cfI return ; -#define VOID_cfI return ; - -#ifdef OLD_VAXC /* Allow %CC-I-PARAMNOTUSED. */ -#pragma standard -#endif - -#define FCALLSCSUB0( CN,UN,LN) FCALLSCFUN0(VOID,CN,UN,LN) -#define FCALLSCSUB1( CN,UN,LN,T1) FCALLSCFUN1(VOID,CN,UN,LN,T1) -#define FCALLSCSUB2( CN,UN,LN,T1,T2) FCALLSCFUN2(VOID,CN,UN,LN,T1,T2) -#define FCALLSCSUB3( CN,UN,LN,T1,T2,T3) FCALLSCFUN3(VOID,CN,UN,LN,T1,T2,T3) -#define FCALLSCSUB4( CN,UN,LN,T1,T2,T3,T4) \ - FCALLSCFUN4(VOID,CN,UN,LN,T1,T2,T3,T4) -#define FCALLSCSUB5( CN,UN,LN,T1,T2,T3,T4,T5) \ - FCALLSCFUN5(VOID,CN,UN,LN,T1,T2,T3,T4,T5) -#define FCALLSCSUB6( CN,UN,LN,T1,T2,T3,T4,T5,T6) \ - FCALLSCFUN6(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6) -#define FCALLSCSUB7( CN,UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - FCALLSCFUN7(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7) -#define FCALLSCSUB8( CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - FCALLSCFUN8(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) -#define FCALLSCSUB9( CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - FCALLSCFUN9(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) -#define FCALLSCSUB10(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - FCALLSCFUN10(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) -#define FCALLSCSUB11(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - FCALLSCFUN11(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) -#define FCALLSCSUB12(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - FCALLSCFUN12(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) -#define FCALLSCSUB13(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - FCALLSCFUN13(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) -#define FCALLSCSUB14(CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - FCALLSCFUN14(VOID,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) - -#define FCALLSCFUN1( T0,CN,UN,LN,T1) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN2( T0,CN,UN,LN,T1,T2) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,T2,CF_0,CF_0,CF_0) -#define FCALLSCFUN3( T0,CN,UN,LN,T1,T2,T3) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,T2,T3,CF_0,CF_0) -#define FCALLSCFUN4( T0,CN,UN,LN,T1,T2,T3,T4) \ - FCALLSCFUN5 (T0,CN,UN,LN,T1,T2,T3,T4,CF_0) -#define FCALLSCFUN5( T0,CN,UN,LN,T1,T2,T3,T4,T5) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,CF_0,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN6( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN7( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,CF_0,CF_0,CF_0) -#define FCALLSCFUN8( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,CF_0,CF_0) -#define FCALLSCFUN9( T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9) \ - FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,CF_0) -#define FCALLSCFUN10(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,CF_0,CF_0,CF_0,CF_0) -#define FCALLSCFUN11(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,CF_0,CF_0,CF_0) -#define FCALLSCFUN12(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,CF_0,CF_0) -#define FCALLSCFUN13(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD) \ - FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,CF_0) - -#ifndef __CF__KnR -#define FCALLSCFUN0(T0,CN,UN,LN) CFextern _(T0,_cfFZ)(UN,LN) ABSOFT_cf2(T0)) \ - {_Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN(); _Icf(0,K,T0,0,0) _(T0,_cfI)} - -#define FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFextern _(T0,_cfF)(UN,LN) \ - CFARGT14(NCF,DCF,ABSOFT_cf2(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)) \ - { CFARGT14S(QCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN( TCF(LN,T1,1,0) TCF(LN,T2,2,1) \ - TCF(LN,T3,3,1) TCF(LN,T4,4,1) TCF(LN,T5,5,1) TCF(LN,T6,6,1) TCF(LN,T7,7,1) \ - TCF(LN,T8,8,1) TCF(LN,T9,9,1) TCF(LN,TA,A,1) TCF(LN,TB,B,1) TCF(LN,TC,C,1) \ - TCF(LN,TD,D,1) TCF(LN,TE,E,1) ); _Icf(0,K,T0,0,0) \ - CFARGT14S(RCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfI) } -#else -#define FCALLSCFUN0(T0,CN,UN,LN) CFextern _(T0,_cfFZ)(UN,LN) ABSOFT_cf3(T0)) _Icf(0,FF,T0,0,0)\ - {_Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN(); _Icf(0,K,T0,0,0) _(T0,_cfI)} - -#define FCALLSCFUN14(T0,CN,UN,LN,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - CFextern _(T0,_cfF)(UN,LN) \ - CFARGT14(NNCF,DDCF,ABSOFT_cf3(T0),T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE)) _Icf(0,FF,T0,0,0) \ - CFARGT14FS(NNNCF,DDDCF,,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE); \ - { CFARGT14S(QCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) \ - _Icf(2,UU,T0,A0,0); _Icf(0,L,T0,0,0) CN( TCF(LN,T1,1,0) TCF(LN,T2,2,1) \ - TCF(LN,T3,3,1) TCF(LN,T4,4,1) TCF(LN,T5,5,1) TCF(LN,T6,6,1) TCF(LN,T7,7,1) \ - TCF(LN,T8,8,1) TCF(LN,T9,9,1) TCF(LN,TA,A,1) TCF(LN,TB,B,1) TCF(LN,TC,C,1) \ - TCF(LN,TD,D,1) TCF(LN,TE,E,1) ); _Icf(0,K,T0,0,0) \ - CFARGT14S(RCF,T1,T2,T3,T4,T5,T6,T7,T8,T9,TA,TB,TC,TD,TE) _(T0,_cfI)} -#endif - - -#endif /* __CFORTRAN_LOADED */ diff --git a/benchmarks/chunky.cpp b/benchmarks/chunky.cpp deleted file mode 100644 index bb704400..00000000 --- a/benchmarks/chunky.cpp +++ /dev/null @@ -1,156 +0,0 @@ -#include - -using namespace blitz; - -void report(const char* name, Timer& timer, int N, - long int iterations); -void unfused(int N); -void fused(int N); -void chunky(int N); - -float* _bz_restrict a; -float* _bz_restrict b; -float* _bz_restrict c; -float* _bz_restrict d; -float* _bz_restrict e; -float* _bz_restrict f; - -/* - * Code to support the "fuse" macro - */ - -int _chunk; -bool _done_chunks; -int _chunk_size = 512; - -#define fuse _chunk = 0; _done_chunks = false; \ - for (; !_done_chunks; ++_chunk) - -int main(int argc, char** argv) -{ - if (argc == 2) - _chunk_size = atoi(argv[1]); - - cout << "Using chunk size " << _chunk_size << endl; - - const int N = 100000; - - a = new float[N]; - b = new float[N]; - c = new float[N]; - d = new float[N]; - e = new float[N]; - f = new float[N]; - - for (int i=0; i < N; ++i) - { - a[i] = i; - b[i] = i; - c[i] = i; - d[i] = i; - } - - Timer timer; - long int iterations = 100; - - timer.start(); - for (long i=0; i < iterations; ++i) - unfused(N); - timer.stop(); - - report("Unfused", timer, N, iterations); - - timer.start(); - for (long i=0; i < iterations; ++i) - fused(N); - timer.stop(); - - report("Fused", timer, N, iterations); - - timer.start(); - for (long i=0; i < iterations; ++i) - chunky(N); - timer.stop(); - - report("Chunky", timer, N, iterations); - - return 0; -} - -void report(const char* name, Timer& timer, int N, - long int iterations) -{ - float flops = float(N) * iterations * 2; - float Mflops = flops / timer.elapsedSeconds() / 1e+6; - cout << setw(20) << name << " " << Mflops << " Mflops/s" << endl; -} - -void __sink() { } - -void unfused(int N) -{ - for (int i=0; i < N; ++i) - e[i] = a[i] * b[i] + c[i] * d[i]; - - __sink(); - - for (int i=0; i < N; ++i) - f[i] = c[i] * b[i] + a[i] * d[i]; -} - -void fused(int N) -{ - for (int i=0; i < N; ++i) - { - e[i] = a[i] * b[i] + c[i] * d[i]; - f[i] = c[i] * b[i] + a[i] * d[i]; - } -} - - -// This "chunky" routine is a simulated implementation of -// expression templates with tiling across multiple statements -// (the "chunky fusion" approach). This code would be -// generated by: -// -// fuse { -// E = A*B + C*D; -// F = C*B + A*D; -// } - -void chunky(int N) -{ - fuse { - - { // Code generated by E = A*B + C*D; - int lbound = _chunk * _chunk_size; - int uboundp1 = lbound + _chunk_size; - - if (uboundp1 > N) - { - _done_chunks = true; - uboundp1 = N; - } - - for (int i=lbound; i < uboundp1; ++i) - e[i] = a[i] * b[i] + c[i] * d[i]; - } - - __sink(); - - { // Code generated by F = C*B + A*D; - int lbound = _chunk * _chunk_size; - int uboundp1 = lbound + _chunk_size; - - if (uboundp1 > N) - { - _done_chunks = true; - uboundp1 = N; - } - - for (int i=lbound; i < uboundp1; ++i) - f[i] = c[i] * b[i] + a[i] * d[i]; - } - } -} - diff --git a/benchmarks/compiletime.cpp b/benchmarks/compiletime.cpp deleted file mode 100644 index e69de29b..00000000 diff --git a/benchmarks/ctime-results b/benchmarks/ctime-results deleted file mode 100644 index f79285c2..00000000 --- a/benchmarks/ctime-results +++ /dev/null @@ -1,94 +0,0 @@ -On olympus.extreme.indiana.edu (sparc-sun-solaris2.6): -egcs 1.1b - -Initial version, with -O2 -ftemplate-depth-30 -O2 -funroll-loops - -fstrict-aliasing - -ctime1 17.7 0.9 -ctime2 25.7 1.2 -ctime3 52.0 2.1 -ctime4 sleep - -With -fno-gcse: -ctime1 17.3 1.0 -ctime2 26.3 1.3 -ctime3 1:02.0 2.1 -ctime4 sleep - -With -O: -ctime1 17.3 0.8 -ctime2 24.4 1.2 -ctime3 51.5 2.1 -ctime4 sleep - -With -O -fno-inline: -ctime1 16.9 0.8 -ctime2 20.0 1.0 -ctime3 24.7 1.2 -ctime4 31.2 1.6 - -Woohoo. Okay, obviously inlining is the key. - -Now try new expression templates: - -With -O -funroll-loops -DBZ_NEW_EXPRESSION_TEMPLATES -ctime1 14.1 0.9 -ctime2 22.3 1.2 -ctime3 58.8 2.2 - -With -O -funroll-loops -DBZ_NEW_EXPRESSION_TEMPLATES -DBZ_NO_INLINE_ET -ctime1 14.1 0.9 -ctime2 21.1 1.0 -ctime3 45.4 1.9 - -With -O -funroll-loops -DBZ_NEW_EXPRESSION_TEMPLATES -DBZ_NO_INLINE_ET -DBZ_ETPARMS_CONSTREF -ctime1 14.6 0.8 -ctime2 20.7 1.1 -ctime3 41.6 2.1 -ctime4 1:27.7 3.0 - -Things to try: --fno-inline - -Just -O (this will turn off -funroll-all-loops) --fno-expensive-optimizations --fno-unroll-all-loops --fno-strength-reduce --fno-rerun-cse-after-loop - - - - - - -On hgar1.cwru.edu (alpha), with KCC: - -With +K3 -O3 -DBZ_NEW_EXPRESSION_TEMPLATES -DBZ_NO_INLINE_ET -DBZ_ETPARMS_CONSTREF: -ctime1 13.1 0.8 -ctime2 20.9 1.0 -ctime3 27.3 1.0 -ctime4 36.2 1.1 -ctime5 48.7 1.2 - -With just +K3 -O3: -ctime1 15.8 0.9 -ctime2 25.3 1.0 -ctime3 46.2 1.2 -ctime4 79.9 1.5 - -So a speed up of about X 2 with KCC, not counting the overhead. - - -Here are the results for : -ctime1 0.9 0.2 -ctime2 2.1 0.2 -ctime3 9.4 0.3 -ctime4 33.2 0.4 -ctime5 1:13 0.6 - - -For C code: -ctime5 0.35 0.08 - -Pretty terrible. - diff --git a/benchmarks/ctime1.cpp b/benchmarks/ctime1.cpp deleted file mode 100644 index 368ffe1f..00000000 --- a/benchmarks/ctime1.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - diff --git a/benchmarks/ctime1v.cpp b/benchmarks/ctime1v.cpp deleted file mode 100644 index 79739a27..00000000 --- a/benchmarks/ctime1v.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -using namespace std; - -int main() -{ -} - diff --git a/benchmarks/ctime2.cpp b/benchmarks/ctime2.cpp deleted file mode 100644 index b1060a01..00000000 --- a/benchmarks/ctime2.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; -} - diff --git a/benchmarks/ctime2v.cpp b/benchmarks/ctime2v.cpp deleted file mode 100644 index 818829cb..00000000 --- a/benchmarks/ctime2v.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include - -using namespace std; - -int main() -{ -} - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; -} - diff --git a/benchmarks/ctime3.cpp b/benchmarks/ctime3.cpp deleted file mode 100644 index 2fe8bbc2..00000000 --- a/benchmarks/ctime3.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; -} - diff --git a/benchmarks/ctime3v.cpp b/benchmarks/ctime3v.cpp deleted file mode 100644 index c0463ff4..00000000 --- a/benchmarks/ctime3v.cpp +++ /dev/null @@ -1,32 +0,0 @@ - -#if defined(__GNUC__) && (__GNUC__ < 3) -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif -inline float cos(float x) { return static_cast(cos(x)); } -inline float sin(float x) { return static_cast(sin(x)); } -inline float tan(float x) { return static_cast(tan(x)); } -inline float log(float x) { return static_cast(log(x)); } -inline float exp(float x) { return static_cast(exp(x)); } -#endif -#include - -using namespace std; - -int main() -{ -} - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; -} - diff --git a/benchmarks/ctime4.cpp b/benchmarks/ctime4.cpp deleted file mode 100644 index 4b766733..00000000 --- a/benchmarks/ctime4.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*pow2(C) + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; -} - diff --git a/benchmarks/ctime4v.cpp b/benchmarks/ctime4v.cpp deleted file mode 100644 index 9fdb8ec1..00000000 --- a/benchmarks/ctime4v.cpp +++ /dev/null @@ -1,39 +0,0 @@ - -#if defined(__GNUC__) && (__GNUC__ < 3) -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif -inline float cos(float x) { return static_cast(cos(x)); } -inline float sin(float x) { return static_cast(sin(x)); } -inline float tan(float x) { return static_cast(tan(x)); } -inline float log(float x) { return static_cast(log(x)); } -inline float exp(float x) { return static_cast(exp(x)); } -#endif -#include - - -using namespace std; - -int main() -{ -} - - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*C*C + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; -} - diff --git a/benchmarks/ctime5.cpp b/benchmarks/ctime5.cpp deleted file mode 100644 index 8ab97c90..00000000 --- a/benchmarks/ctime5.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include - -using namespace blitz; - -int main() -{ -} - -void foo() -{ - const int N = 10; - Array A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*C*C + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; - C = A+B+C+D+E; - E = (A+B)*(C-D); - D = A/B*C/D; - B = (A*B) + (A/B); - D = sin(A) + sin(E); -} - diff --git a/benchmarks/ctime5c.cpp b/benchmarks/ctime5c.cpp deleted file mode 100644 index f6761316..00000000 --- a/benchmarks/ctime5c.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include - -int main() -{ -} - -int N; -float* A, * B, * C, * D, * E; - -void foo() -{ - for (int i=0; i < N; ++i) - A[i] = B[i]*C[i] + D[i]*E[i]; - - for (int i=0; i < N; ++i) - A[i] = B[i]+C[i] + D[i]*cos(E[i]); - - for (int i=0; i < N; ++i) - A[i] = B[i]*sin(C[i]) + D[i]*sin(E[i]); - - for (int i=0; i < N; ++i) - B[i] = C[i] + D[i]; - - for (int i=0; i < N; ++i) - A[i] = A[i] + B[i] + C[i] + D[i]; - - for (int i=0; i < N; ++i) - A[i] = B[i]*C[i]*C[i] + D[i]*E[i]; - - for (int i=0; i < N; ++i) - A[i] = B[i]-C[i] + log(D[i])*cos(E[i]); - - for (int i=0; i < N; ++i) - A[i] = B[i]*sin(C[i]) + tan(D[i])/E[i]; - - for (int i=0; i < N; ++i) - B[i] = C[i] - D[i]; - - for (int i=0; i < N; ++i) - A[i] = A[i]+B[i]*C[i]+D[i]; - - for (int i=0; i < N; ++i) - C[i] = A[i]+B[i]+C[i]+D[i]+E[i]; - - for (int i=0; i < N; ++i) - E[i] = (A[i]+B[i])*(C[i]-D[i]); - - for (int i=0; i < N; ++i) - D[i] = A[i]/B[i]*C[i]/D[i]; - - for (int i=0; i < N; ++i) - B[i] = (A[i]*B[i]) + (A[i]/B[i]); - - for (int i=0; i < N; ++i) - D[i] = sin(A[i]) + sin(E[i]); -} - diff --git a/benchmarks/ctime5v.cpp b/benchmarks/ctime5v.cpp deleted file mode 100644 index 58720218..00000000 --- a/benchmarks/ctime5v.cpp +++ /dev/null @@ -1,42 +0,0 @@ - -#if defined(__GNUC__) && (__GNUC__ < 3) -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif -inline float cos(float x) { return static_cast(cos(x)); } -inline float sin(float x) { return static_cast(sin(x)); } -inline float tan(float x) { return static_cast(tan(x)); } -inline float log(float x) { return static_cast(log(x)); } -inline float exp(float x) { return static_cast(exp(x)); } -#endif -#include - -using namespace std; - -int main() -{ -} - -void foo() -{ - const int N = 10; - valarray A(N), B(N), C(N), D(N), E(N); - A = B*C + D*E; - A = B+C + D*cos(E); - A = B*sin(C) + D*sin(E); - B = C + D; - A = A+B+C+D; - A = B*C*C + D*E; - A = B-C + log(D)*cos(E); - A = B*sin(C) + tan(D)/E; - B = C - D; - A = A+B*C+D; - C = A+B+C+D+E; - E = (A+B)*(C-D); - D = A/B*C/D; - B = (A*B) + (A/B); - D = sin(A) + sin(E); -} - diff --git a/benchmarks/daxpy.cpp b/benchmarks/daxpy.cpp deleted file mode 100644 index 957d7914..00000000 --- a/benchmarks/daxpy.cpp +++ /dev/null @@ -1,346 +0,0 @@ -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define fdaxpy fdaxpy_ - #define daxpy daxpy_ - #define f90daxpy f90daxpy_ - #define fidaxpy fidaxpy_ - #define fidaxpyo fidaxpyo_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define fdaxpy fdaxpy__ - #define daxpy daxpy__ - #define f90daxpy f90daxpy__ - #define fidaxpy fidaxpy__ - #define fidaxpyo fidaxpyo__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define fdaxpy FDAXPY - #define daxpy DAXPY - #define f90daxpy F90DAXPY - #define fidaxpy FIDAXPY - #define fidaxpyo FIDAXPYO -#endif - -extern "C" { - void fdaxpy(const int& N, const double& da, double* x, - const int& xstride, const double* y, const int& ystride); - - void daxpy(const int& N, const double& da, double* x, - const int& xstride, const double* y, const int& ystride); - - void f90daxpy(const double& a, double* x, - const double* y, const int& length, const int& iters); - - void fidaxpy(const double& a, double* x, const double* y, - const int& length, const int& iters); - - void fidaxpyo(const double& a, double* x, const double* y, - const int& length, const int& iters); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b); -void daxpyArrayVersion(BenchmarkExt& bench, double a); -void daxpyF77Version(BenchmarkExt& bench, double a); -void daxpyBLASVersion(BenchmarkExt& bench, double a); -#ifdef FORTRAN_90 -void daxpyF90Version(BenchmarkExt& bench, double a); -#endif -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a); -#endif - -int main() -{ - int numBenchmarks = 6; -#ifndef BENCHMARK_VALARRAY - numBenchmarks--; // No valarray -#endif -#ifndef FORTRAN_90 - numBenchmarks--; // No fortran 90 -#endif - - BenchmarkExt bench("DAXPY Benchmark", numBenchmarks); - - const int numSizes = 19; - bench.setNumParameters(numSizes); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters(i) = static_cast(pow(10.0, 0.25*(i+1))); - iters(i) = 50000000L / parameters(i); - if (iters(i) < 2) - iters(i) = 2; - flops(i) = 2 * parameters(i) * 2; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - - float a = .398498293819823; - - daxpyVectorVersion(bench, a, -a); - daxpyArrayVersion(bench, a); - daxpyF77Version(bench, a); - daxpyBLASVersion(bench, a); -#ifdef FORTRAN_90 - daxpyF90Version(bench, a); -#endif -#ifdef BENCHMARK_VALARRAY - daxpyValarrayVersion(bench, a); -#endif - - bench.endBenchmarking(); - - bench.saveMatlabGraph("daxpy.m"); - - return 0; -} - - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Vector: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Vector x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} - - -void daxpyArrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Array: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a * x; - y = y + b * x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -void daxpyF77Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - fidaxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - - -void daxpyBLASVersion(BenchmarkExt& bench, double a) -{ -#ifdef USE_LIBBLAS - bench.beginImplementation("Platform BLAS"); -#else - bench.beginImplementation("Fortran BLAS"); -#endif - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran BLAS: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - int xstride = 1, ystride = 1; - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { -#ifdef USE_LIBBLAS - daxpy(N, a, x, xstride, y, ystride); - daxpy(N, b, x, xstride, y, ystride); -#else - fdaxpy(N, a, x, xstride, y, ystride); - fdaxpy(N, b, x, xstride, y, ystride); -#endif - } - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void daxpyF90Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - f90daxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} -#endif - -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "valarray: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - valarray x(N), y(N); - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/daxpy2.cpp b/benchmarks/daxpy2.cpp deleted file mode 100644 index 39ba7ccb..00000000 --- a/benchmarks/daxpy2.cpp +++ /dev/null @@ -1,312 +0,0 @@ -// In KAI C++ 3.2c, restrict causes problems for copy propagation. -// Temporary kludge is to disable use of the restrict keyword. - -#define BZ_DISABLE_RESTRICT - -#include -#include -#include -#include - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -using namespace blitz; - -#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES - #define fdaxpy fdaxpy_ - #define f90daxpy f90daxpy_ - #define fidaxpy fidaxpy_ - #define fidaxpyo fidaxpyo_ -#endif - -#ifdef BZ_FORTRAN_SYMBOLS_CAPS - #define fdaxpy FDAXPY - #define f90daxpy F90DAXPY - #define fidaxpy FIDAXPY - #define fidaxpyo FIDAXPYO -#endif - -extern "C" { - void fdaxpy(const int& N, const double& da, double* x, - const int& xstride, const double* y, const int& ystride); - - void f90daxpy(const double& a, double* x, - const double* y, const int& length, const int& iters); - - void fidaxpy(const double& a, double* x, const double* y, - const int& length, const int& iters); - - void fidaxpyo(const double& a, double* x, const double* y, - const int& length, const int& iters); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b); -void daxpyArrayVersion(BenchmarkExt& bench, double a); -void daxpyF77Version(BenchmarkExt& bench, double a); -void daxpyBLASVersion(BenchmarkExt& bench, double a); -void daxpyF90Version(BenchmarkExt& bench, double a); - -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a); -#endif - -int main() -{ - -#ifdef BENCHMARK_VALARRAY - int numBenchmarks = 6; -#else - int numBenchmarks = 5; -#endif - - BenchmarkExt bench("DAXPY Benchmark", numBenchmarks); - - const int numSizes = 19; - bench.setNumParameters(numSizes); - bench.setRateDescription("Mflops/s"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters[i] = pow(10.0, (i+1)/4.0); - iters[i] = 50000000L / parameters[i]; - if (iters[i] < 2) - iters[i] = 2; - flops[i] = 2 * parameters[i] * 2; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - - float a = .398498293819823; - - daxpyVectorVersion(bench, a, -a); - daxpyArrayVersion(bench, a); - daxpyF77Version(bench, a); - daxpyBLASVersion(bench, a); - daxpyF90Version(bench, a); - -#ifdef BENCHMARK_VALARRAY - daxpyValarrayVersion(bench, a); -#endif - - bench.endBenchmarking(); - - bench.saveMatlabGraph("daxpy2.m"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements, int stride = 1) -{ - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i*stride] = rnd.random(); -} - -template -void initializeArray(T& array, int numElements) -{ - static Random rnd; - - for (size_t i=0; i < numElements; ++i) - array[i] = rnd.random(); -} - -void daxpyVectorVersion(BenchmarkExt& bench, double a, double b) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Vector: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Vector x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} - - -void daxpyArrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Array: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array x(N), y(N); - initializeRandomDouble(x.data(), N); - initializeRandomDouble(y.data(), N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void daxpyF77Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - fidaxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - - -void daxpyBLASVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran BLAS"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran BLAS: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - int xstride = 1, ystride = 1; - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - fdaxpy(N, a, x, xstride, y, ystride); - fdaxpy(N, b, x, xstride, y, ystride); - } - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - -void daxpyF90Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - double* y = new double[N]; - initializeRandomDouble(x, N); - initializeRandomDouble(y, N); - - bench.start(); - f90daxpy(a, x, y, N, iters); - bench.stop(); - - delete [] x; - delete [] y; - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void daxpyValarrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "valarray: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - valarray x(N), y(N); - initializeArray(x, N); - initializeArray(y, N); - - double b = - a; - - bench.start(); - for (long i=0; i < iters; ++i) - { - y += a * x; - y += b * x; - } - bench.stop(); - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/daxpyf90-2.f90 b/benchmarks/daxpyf90-2.f90 deleted file mode 100644 index e0d2578d..00000000 --- a/benchmarks/daxpyf90-2.f90 +++ /dev/null @@ -1,15 +0,0 @@ -! Fortran 90 DAXPY using arrays -SUBROUTINE f90daxpy(a, x, y, n, iters) - IMPLICIT NONE - INTEGER, INTENT( IN ) :: n, iters - DOUBLE PRECISION, DIMENSION (n) :: x, y - DOUBLE PRECISION, INTENT( IN ) :: a - DOUBLE PRECISION :: b - - b = - a - - DO i=1,iters - y = y + a * y - y = y + b * y - END DO -END SUBROUTINE diff --git a/benchmarks/daxpyf90.f90 b/benchmarks/daxpyf90.f90 deleted file mode 100644 index 03e4344a..00000000 --- a/benchmarks/daxpyf90.f90 +++ /dev/null @@ -1,12 +0,0 @@ - SUBROUTINE f90daxpy(a, x, y, n, iters) - INTEGER n, iters - DOUBLE PRECISION, DIMENSION (n) :: x, y - DOUBLE PRECISION a, b - - b = - a - - DO i=1,iters - y = y + a * x - y = y + b * x - END DO - END SUBROUTINE diff --git a/benchmarks/dot.cpp b/benchmarks/dot.cpp deleted file mode 100644 index 78a9471f..00000000 --- a/benchmarks/dot.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include -#include - -using namespace blitz; - -double dot(const double* a, const double* b, int n) -{ - double result = 0.; - for (int i=0; i < n; ++i) - result += a[i] * b[i]; - - return result; -} - -template -void sink(T&) -{ -} - -void sink(double,double,double,double,double,double,double,double,double,double) -{ -} - -int main() -{ - Timer timer; - const int iterations = 1000000; - - double a1[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b1[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a2[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b2[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a3[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b3[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a4[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b4[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a5[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b5[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a6[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b6[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a7[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b7[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a8[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b8[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a9[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b9[3] = { 0.3989421, 0.9854983, 0.58439328 }; - double a10[3] = { 0.3242343, 0.1429833, 0.43988583 }; - double b10[3] = { 0.3989421, 0.9854983, 0.58439328 }; - - timer.start(); - for (int i=0; i < iterations; ++i) - { - double result1 = dot(a1,b1,3); - double result2 = dot(a2,b2,3); - double result3 = dot(a3,b3,3); - double result4 = dot(a4,b4,3); - double result5 = dot(a5,b5,3); - double result6 = dot(a6,b6,3); - double result7 = dot(a7,b7,3); - double result8 = dot(a8,b8,3); - double result9 = dot(a9,b9,3); - double result10 = dot(a10,b10,3); - sink(result1,result2,result3,result4,result5,result6,result7,result8, - result9,result10); - } - timer.stop(); - - double Mflops = 10.0 * 5.0 * iterations / 1e+6; - cout << "Nonspecialized algorithm: " << (Mflops/timer.elapsedSeconds()) - << endl; - - TinyVector c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, - d1, d2, d3, d4, d5, d6, d7, d8, d9, d10; - c1 = 0.3242343, 0.1429833, 0.43988583; - d1 = 0.3989421, 0.9854983, 0.58439328; - sink(c1); - sink(d1); - c2 = 0.3242343, 0.1429833, 0.43988583; - d2 = 0.3989421, 0.9854983, 0.58439328; - sink(c2); - sink(d2); - c3 = 0.3242343, 0.1429833, 0.43988583; - d3 = 0.3989421, 0.9854983, 0.58439328; - sink(c3); - sink(d3); - c4 = 0.3242343, 0.1429833, 0.43988583; - d4 = 0.3989421, 0.9854983, 0.58439328; - sink(c4); - sink(d4); - c5 = 0.3242343, 0.1429833, 0.43988583; - d5 = 0.3989421, 0.9854983, 0.58439328; - sink(c5); - sink(d5); - c6 = 0.3242343, 0.1429833, 0.43988583; - d6 = 0.3989421, 0.9854983, 0.58439328; - sink(c6); - sink(d6); - c7 = 0.3242343, 0.1429833, 0.43988583; - d7 = 0.3989421, 0.9854983, 0.58439328; - sink(c7); - sink(d7); - c8 = 0.3242343, 0.1429833, 0.43988583; - d8 = 0.3989421, 0.9854983, 0.58439328; - sink(c8); - sink(d8); - c9 = 0.3242343, 0.1429833, 0.43988583; - d9 = 0.3989421, 0.9854983, 0.58439328; - sink(c9); - sink(d9); - c10 = 0.3242343, 0.1429833, 0.43988583; - d10 = 0.3989421, 0.9854983, 0.58439328; - sink(c10); - sink(d10); - - timer.start(); - for (int i=0; i < iterations; ++i) - { - double result1 = dot(c1, d1); - double result2 = dot(c2, d2); - double result3 = dot(c3, d3); - double result4 = dot(c4, d4); - double result5 = dot(c5, d5); - double result6 = dot(c6, d6); - double result7 = dot(c7, d7); - double result8 = dot(c8, d8); - double result9 = dot(c9, d9); - double result10 = dot(c10, d10); - sink(result1, result2, result3, result4, result5, result6, result7, - result8, result9, result10); - } - timer.stop(); - cout << "Metaprogram: " << (Mflops/timer.elapsedSeconds()) - << endl; - - return 0; -} - diff --git a/benchmarks/dot2.cpp b/benchmarks/dot2.cpp deleted file mode 100644 index 97ee26fc..00000000 --- a/benchmarks/dot2.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include - -using namespace blitz; - -double dot(const double* a, const double* b, int n) -{ - double result = 0.; - for (int i=0; i < n; ++i) - result += a[i] * b[i]; - - return result; -} - -template -void sink(T&) -{ -} - -void sink(double,double,double,double,double,double,double,double,double,double) -{ -} - -void init(double* x, int n) -{ - // Completely arbitrary - for (int i=0; i < n; ++i) - x[i] = 3.4982938192839824982 * i; -} - -const int nmax = 40; - -int main() -{ - Timer timer; - const int iterations1 = 5000000; - - double a1[nmax],a2[nmax],a3[nmax],a4[nmax],a5[nmax],a6[nmax],a7[nmax],a8[nmax],a9[nmax], - a10[nmax],b1[nmax],b2[nmax],b3[nmax],b4[nmax],b5[nmax],b6[nmax],b7[nmax],b8[nmax], - b9[nmax],b10[nmax]; - init(a1,nmax); - init(a2,nmax); - init(a3,nmax); - init(a4,nmax); - init(a5,nmax); - init(a6,nmax); - init(a7,nmax); - init(a8,nmax); - init(a9,nmax); - init(a10,nmax); - init(b1,nmax); - init(b2,nmax); - init(b3,nmax); - init(b4,nmax); - init(b5,nmax); - init(b6,nmax); - init(b7,nmax); - init(b8,nmax); - init(b9,nmax); - init(b10,nmax); - - for (int n=1; n < nmax; ++n) - { - int iterations = iterations1 / n; - - timer.start(); - for (int i=0; i < iterations; ++i) - { - double result1 = dot(a1,b1,n); - double result2 = dot(a2,b2,n); - double result3 = dot(a3,b3,n); - double result4 = dot(a4,b4,n); - double result5 = dot(a5,b5,n); - double result6 = dot(a6,b6,n); - double result7 = dot(a7,b7,n); - double result8 = dot(a8,b8,n); - double result9 = dot(a9,b9,n); - double result10 = dot(a10,b10,n); - sink(result1,result2,result3,result4,result5,result6,result7,result8, - result9,result10); - } - timer.stop(); - - double Mflops = 10.0 * (n + (n-1)) * iterations / 1e+6; - cout << n << '\t' << (Mflops/timer.elapsedSeconds()) << endl; - } - - return 0; -} - diff --git a/benchmarks/echof2-back.f b/benchmarks/echof2-back.f deleted file mode 100644 index 02c7c32e..00000000 --- a/benchmarks/echof2-back.f +++ /dev/null @@ -1,133 +0,0 @@ -! Tuned Fortran 77 version -! Optimizations: -! - blocked stencil algorithm to improve cache use -! - arrays interlaced by making one big 3-dimensional array -! - copying of arrays avoided by cycling indices into the -! 3-d array - - SUBROUTINE echo_f77Tuned(N, niters, check) - INTEGER N, niters, iter - REAL check - REAL A(N,N,4) -! P1 = A(N,N,1), P2 = A(N,N,2), P3 = A(N,N,3), C = A(N,N,4) - INTEGER P1, P2, P3, C - INTEGER i, j - INTEGER bi,bj,ni,nj,blockSize - - P1 = 1 - P2 = 2 - P3 = 3 - C = 4 - - CALL echo_f77Tuned_setInitialConditions(A,C,P1,P2,P3,N) - - blockSize = 128 - - DO iter=1, niters - DO bj=2,N-1,blockSize - nj = min(bj+blockSize-1,N-1) - DO bi=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - DO j=bj,nj - DO i=bi,ni - A(i,j,P3) = (2-4*A(i,j,C))*A(i,j,P2) + A(i,j,C) - . *(A(i,j-1,P2) + A(i,j+1,P2) + A(i-1,j,P2) - . + A(i+1,j,P2)) - A(i,j,P1) - END DO - END DO - END DO - END DO - P1 = P2 - P2 = P3 - END DO - - check = A(N/2,N/2,P1) - - RETURN - END - - - - - SUBROUTINE echo_f77Tuned_setInitialConditions(A, C, P1, P2, P3, N) - INTEGER N - REAL A(N,N,4) - INTEGER C, P1, P2, P3 - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - A(i,j,C) = 0.2; - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - A(i,j,C) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - A(channel1Height,j,C) = 0.0; - A(channel2Height,j,C) = 0.0; - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO j=1,N - DO i=1,N - A(i,j,P1) = 0.0 - A(i,j,P2) = exp(-((i-cr)**2 + (j-cc)**2) * s2); - A(i,j,P3) = 0.0 - END DO - END DO - - CALL checkArray2(A,P2,N) - CALL checkArray2(A,C,N) - - RETURN - END - - - - - - SUBROUTINE checkArray2(A, P, N) - INTEGER N, P - REAL A(N,N,4) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j,P) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END diff --git a/benchmarks/echotune.cpp b/benchmarks/echotune.cpp deleted file mode 100644 index 85b0a2ac..00000000 --- a/benchmarks/echotune.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include - -using namespace blitz; -using namespace std; - -extern "C" { - void echo_f77tuned(int& N, int& niters, float& check, int& blockSize); -} - -int main() -{ - int N = 1024; - int niters = 48; - float check; - double Mflops = niters * 9; - Timer timer; - - ofstream ofs("echotune.log"); - - cout << "This program decides on the best block size for a typical 2D " - << endl << "stencil operation. Pick the block size which has the " - << endl << "maximum Mflops/s." << endl << endl; - - cout << "Block size\tMflops/s" << endl; - - int blockSize; - - for (blockSize=1; blockSize < 32; ++blockSize) - { - timer.start(); - echo_f77tuned(N, niters, check, blockSize); - timer.stop(); - cout << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - ofs << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - } - for (; blockSize < 1024; blockSize += 32) - { - timer.start(); - echo_f77tuned(N, niters, check, blockSize); - timer.stop(); - cout << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - ofs << blockSize << "\t" << (Mflops/timer.elapsedSeconds()) << endl; - } - - return 0; -} - diff --git a/benchmarks/echotune.m b/benchmarks/echotune.m deleted file mode 100644 index 6646ecfa..00000000 --- a/benchmarks/echotune.m +++ /dev/null @@ -1,63 +0,0 @@ -A = [ 2 11.2062 -4 14.2105 -6 15.6069 -8 16.5138 -10 17.0481 -12 17.2869 -14 17.6543 -16 17.7778 -18 18.0225 -20 18.0225 -22 18.2896 -24 18.3362 -26 18.2741 -28 18.4537 -30 18.5647 -32 18.6368 -34 18.799 -36 18.7013 -38 18.5886 -40 18.6127 -42 18.799 -44 18.8811 -46 18.9225 -48 18.8317 -50 18.9391 -52 18.9391 -54 18.9723 -56 19.0225 -58 19.0225 -60 19.0728 -62 19.056 -64 19.115 -66 19.1066 -68 19.056 -70 19.1235 -72 19.0644 -74 19.0644 -76 18.9308 -78 18.9723 -80 19.115 -82 19.1489 -84 19.0392 -86 19.2342 -88 19.132 -90 18.8153 -92 19.056 -94 19.1574 -96 18.8235 -98 19.0476 -100 19.0813 -102 19.1066 -104 18.989 -106 19.1829 -108 19.1066 -110 19.0728 -112 19.1744 -114 19.0813 -116 19.1066 -118 19.1659 -120 19.0141 -122 19.115 -124 19.1744 -126 19.132 ]; diff --git a/benchmarks/echotunef.f b/benchmarks/echotunef.f deleted file mode 100644 index d710892b..00000000 --- a/benchmarks/echotunef.f +++ /dev/null @@ -1,128 +0,0 @@ -! Tuned Fortran 77 version -! Optimizations: -! - blocked stencil algorithm to improve cache use -! - arrays interlaced by making one big 3-dimensional array -! - copying of arrays avoided by cycling indices into the -! 3-d array - - SUBROUTINE echo_f77Tuned(N, niters, check, blockSize) - INTEGER N, niters, iter, blockSize - REAL check - REAL A(N,N,4) -! P1 = A(N,N,1), P2 = A(N,N,2), P3 = A(N,N,3), C = A(N,N,4) - INTEGER P1, P2, P3, C - INTEGER i, j - INTEGER bi,bj,ni,nj - - P1 = 1 - P2 = 2 - P3 = 3 - C = 4 - - CALL echo_f77Tuned_setInitialConditions(A,C,P1,P2,P3,N) - - DO iter=1, niters - DO bj=2,N-1,blockSize - nj = min(bj+blockSize-1,N-1) - DO bi=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - DO j=bj,nj - DO i=bi,ni - A(i,j,P3) = (2-4*A(i,j,C))*A(i,j,P2) + A(i,j,C) - . *(A(i,j-1,P2) + A(i,j+1,P2) + A(i-1,j,P2) - . + A(i+1,j,P2)) - A(i,j,P1) - END DO - END DO - END DO - END DO - P1 = P2 - P2 = P3 - END DO - - check = A(N/2,N/2,P1) - - RETURN - END - - - - - SUBROUTINE echo_f77Tuned_setInitialConditions(A, C, P1, P2, P3, N) - INTEGER N - REAL A(N,N,4) - INTEGER C, P1, P2, P3 - INTEGER i, j, blockLeft, blockRight, blockTop, blockBottom - INTEGER channelLeft, channelRight, channel1Height, channel2Height - INTEGER cr, cc - REAL s2 - -! Default velocity in the air - - DO j=1,N - DO i=1,N - A(i,j,C) = 0.2; - END DO - END DO - -! Solid block with which the pulse collids - - blockLeft = 1 - blockRight = 2 * N / 5.0 - blockTop = N / 3.0 - blockBottom = 2 * N / 3.0 - - DO j=blockLeft,blockRight - DO i=blockTop,blockBottom - A(i,j,C) = 0.5 - END DO - END DO - -! Channel directing the pulse leftwards - - channelLeft = 4 * N / 5.0 - channelRight = N - channel1Height = 3 * N / 8.0 - channel2Height = 5 * N / 8.0 - - DO j = channelLeft,channelRight - A(channel1Height,j,C) = 0.0; - A(channel2Height,j,C) = 0.0; - END DO - -! Initial pressure distribution: a gaussian pulse inside the channel - - cr = N / 2 - cc = 7 * N / 8.0 - s2 = 64.0 * 9.0 / ((N/2.0) ** 2) - - DO j=1,N - DO i=1,N - A(i,j,P1) = 0.0 - A(i,j,P2) = exp(-((i-cr)**2 + (j-cc)**2) * s2); - A(i,j,P3) = 0.0 - END DO - END DO - - RETURN - END - - - - - - SUBROUTINE checkArray2(A, P, N) - INTEGER N, P - REAL A(N,N,4) - - INTEGER i,j - REAL check - check = 0.0 - DO j=1,N - DO i=1,N - check = check + (i*n+j)*A(i,j,P) - END DO - END DO - - PRINT *, 'Array check: ', check - RETURN - END diff --git a/benchmarks/fdaxpy.f b/benchmarks/fdaxpy.f deleted file mode 100644 index cfc3737a..00000000 --- a/benchmarks/fdaxpy.f +++ /dev/null @@ -1,48 +0,0 @@ - subroutine fdaxpy(n,da,dx,incx,dy,incy) -c -c constant times a vector plus a vector. -c uses unrolled loops for increments equal to one. -c jack dongarra, linpack, 3/11/78. -c modified 12/3/93, array(1) declarations changed to array(*) -c - double precision dx(*),dy(*),da - integer i,incx,incy,ix,iy,m,mp1,n -c - if(n.le.0)return - if (da .eq. 0.0d0) return - if(incx.eq.1.and.incy.eq.1)go to 20 -c -c code for unequal increments or equal increments -c not equal to 1 -c - ix = 1 - iy = 1 - if(incx.lt.0)ix = (-n+1)*incx + 1 - if(incy.lt.0)iy = (-n+1)*incy + 1 - do 10 i = 1,n - dy(iy) = dy(iy) + da*dx(ix) - ix = ix + incx - iy = iy + incy - 10 continue - return -c -c code for both increments equal to 1 -c -c -c clean-up loop -c - 20 m = mod(n,4) - if( m .eq. 0 ) go to 40 - do 30 i = 1,m - dy(i) = dy(i) + da*dx(i) - 30 continue - if( n .lt. 4 ) return - 40 mp1 = m + 1 - do 50 i = mp1,n,4 - dy(i) = dy(i) + da*dx(i) - dy(i + 1) = dy(i + 1) + da*dx(i + 1) - dy(i + 2) = dy(i + 2) + da*dx(i + 2) - dy(i + 3) = dy(i + 3) + da*dx(i + 3) - 50 continue - return - end diff --git a/benchmarks/fidaxpy.f b/benchmarks/fidaxpy.f deleted file mode 100644 index d1fec4ac..00000000 --- a/benchmarks/fidaxpy.f +++ /dev/null @@ -1,27 +0,0 @@ - subroutine fidaxpy(a, x, y, n, iters) - - integer i - double precision x(n), y(n), a - double precision b - b = - a - - do 2000 j = 1, iters - do 1000 i = 1, n - y(i) = y(i) + a * x(i) - 1000 continue - - do 3000 i = 1, n - y(i) = y(i) + b * x(i) - 3000 continue - 2000 continue - - return - end - - subroutine fidaxpyo(a, x, y, n, iters) - - double precision x(n), y(n), a - - return - end - diff --git a/benchmarks/floop1.cpp b/benchmarks/floop1.cpp deleted file mode 100644 index 4dd6139b..00000000 --- a/benchmarks/floop1.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// floop1 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop1_f77 floop1_f77_ - #define floop1_f77overhead floop1_f77overhead_ - #define floop1_f90 floop1_f90_ - #define floop1_f90overhead floop1_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop1_f77 floop1_f77__ - #define floop1_f77overhead floop1_f77overhead__ - #define floop1_f90 floop1_f90__ - #define floop1_f90overhead floop1_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop1_f77 FLOOP1_F77 - #define floop1_f77overhead FLOOP1_F77OVERHEAD - #define floop1_f90 FLOOP1_F90 - #define floop1_f90overhead FLOOP1_F90OVERHEAD -#endif - -extern "C" { - void floop1_f77(const int& N, float* x, float* y); - void floop1_f77overhead(const int& N, float* x, float* y); - void floop1_f90(const int& N, float* x, float* y); - void floop1_f90overhead(const int& N, float* x, float* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop1: $x = sqrt($y)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop1.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop1_f77(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop1_f77overhead(N, x, y); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop1_f90(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop1_f90overhead(N, x, y); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop10.cpp b/benchmarks/floop10.cpp deleted file mode 100644 index 495dabb0..00000000 --- a/benchmarks/floop10.cpp +++ /dev/null @@ -1,483 +0,0 @@ - -// floop10 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop10_f77 floop10_f77_ - #define floop10_f77overhead floop10_f77overhead_ - #define floop10_f90 floop10_f90_ - #define floop10_f90overhead floop10_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop10_f77 floop10_f77__ - #define floop10_f77overhead floop10_f77overhead__ - #define floop10_f90 floop10_f90__ - #define floop10_f90overhead floop10_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop10_f77 FLOOP10_F77 - #define floop10_f77overhead FLOOP10_F77OVERHEAD - #define floop10_f90 FLOOP10_F90 - #define floop10_f90overhead FLOOP10_F90OVERHEAD -#endif - -extern "C" { - void floop10_f77(const int& N, float* x, float* a, float* b, float* c, const float& u); - void floop10_f77overhead(const int& N, float* x, float* a, float* b, float* c, const float& u); - void floop10_f90(const int& N, float* x, float* a, float* b, float* c, const float& u); - void floop10_f90overhead(const int& N, float* x, float* a, float* b, float* c, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop10: $x = u+$a+$b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop10.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a(tensor::i)+b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop10_f77(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop10_f77overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop10_f90(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop10_f90overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop10f.f b/benchmarks/floop10f.f deleted file mode 100644 index 0943d67f..00000000 --- a/benchmarks/floop10f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop10_F77(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = u+a(i)+b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE floop10_F77Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/floop10f90.f90 b/benchmarks/floop10f90.f90 deleted file mode 100644 index bcccb4c5..00000000 --- a/benchmarks/floop10f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop10_F90(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - - x = u+a+b+c - RETURN - END - - - SUBROUTINE floop10_F90Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/floop11.cpp b/benchmarks/floop11.cpp deleted file mode 100644 index 04ceca19..00000000 --- a/benchmarks/floop11.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop11 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop11_f77 floop11_f77_ - #define floop11_f77overhead floop11_f77overhead_ - #define floop11_f90 floop11_f90_ - #define floop11_f90overhead floop11_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop11_f77 floop11_f77__ - #define floop11_f77overhead floop11_f77overhead__ - #define floop11_f90 floop11_f90__ - #define floop11_f90overhead floop11_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop11_f77 FLOOP11_F77 - #define floop11_f77overhead FLOOP11_F77OVERHEAD - #define floop11_f90 FLOOP11_F90 - #define floop11_f90overhead FLOOP11_F90OVERHEAD -#endif - -extern "C" { - void floop11_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop11_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop11_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop11_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop11: $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop11.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop11_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop11_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop11_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop11_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop11f.f b/benchmarks/floop11f.f deleted file mode 100644 index b1e89d5a..00000000 --- a/benchmarks/floop11f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop11_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE floop11_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop11f90.f90 b/benchmarks/floop11f90.f90 deleted file mode 100644 index 342effdd..00000000 --- a/benchmarks/floop11f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop11_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = a+b+c+d - RETURN - END - - - SUBROUTINE floop11_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop12.cpp b/benchmarks/floop12.cpp deleted file mode 100644 index 4d0fbfb6..00000000 --- a/benchmarks/floop12.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// floop12 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop12_f77 floop12_f77_ - #define floop12_f77overhead floop12_f77overhead_ - #define floop12_f90 floop12_f90_ - #define floop12_f90overhead floop12_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop12_f77 floop12_f77__ - #define floop12_f77overhead floop12_f77overhead__ - #define floop12_f90 floop12_f90__ - #define floop12_f90overhead floop12_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop12_f77 FLOOP12_F77 - #define floop12_f77overhead FLOOP12_F77OVERHEAD - #define floop12_f90 FLOOP12_F90 - #define floop12_f90overhead FLOOP12_F90OVERHEAD -#endif - -extern "C" { - void floop12_f77(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop12_f77overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop12_f90(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop12_f90overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop12: $y = u+$a; $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop12.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a(tensor::i); x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop12_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop12_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop12_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop12_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop12f.f b/benchmarks/floop12f.f deleted file mode 100644 index f7b8698d..00000000 --- a/benchmarks/floop12f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop12_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - y(i) = u+a(i); x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE floop12_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/floop12f90.f90 b/benchmarks/floop12f90.f90 deleted file mode 100644 index 6ba41087..00000000 --- a/benchmarks/floop12f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop12_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - y = u+a; x = a+b+c+d - RETURN - END - - - SUBROUTINE floop12_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/floop13.cpp b/benchmarks/floop13.cpp deleted file mode 100644 index 368fc2ba..00000000 --- a/benchmarks/floop13.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// floop13 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop13_f77 floop13_f77_ - #define floop13_f77overhead floop13_f77overhead_ - #define floop13_f90 floop13_f90_ - #define floop13_f90overhead floop13_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop13_f77 floop13_f77__ - #define floop13_f77overhead floop13_f77overhead__ - #define floop13_f90 floop13_f90__ - #define floop13_f90overhead floop13_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop13_f77 FLOOP13_F77 - #define floop13_f77overhead FLOOP13_F77OVERHEAD - #define floop13_f90 FLOOP13_F90 - #define floop13_f90overhead FLOOP13_F90OVERHEAD -#endif - -extern "C" { - void floop13_f77(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop13_f77overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop13_f90(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - void floop13_f90overhead(const int& N, float* y, float* x, float* a, float* b, float* c, float* d, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop13: $x = $a+$b+$c+$d; $y = u+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop13.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i); y = u+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop13_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop13_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop13_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop13_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop13f.f b/benchmarks/floop13f.f deleted file mode 100644 index 8f261d16..00000000 --- a/benchmarks/floop13f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop13_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); y(i) = u+d(i); - END DO - RETURN - END - - - SUBROUTINE floop13_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/floop13f90.f90 b/benchmarks/floop13f90.f90 deleted file mode 100644 index 0a690c8b..00000000 --- a/benchmarks/floop13f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop13_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - x = a+b+c+d; y = u+d - RETURN - END - - - SUBROUTINE floop13_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/floop14.cpp b/benchmarks/floop14.cpp deleted file mode 100644 index b7b81b80..00000000 --- a/benchmarks/floop14.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// floop14 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop14_f77 floop14_f77_ - #define floop14_f77overhead floop14_f77overhead_ - #define floop14_f90 floop14_f90_ - #define floop14_f90overhead floop14_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop14_f77 floop14_f77__ - #define floop14_f77overhead floop14_f77overhead__ - #define floop14_f90 floop14_f90__ - #define floop14_f90overhead floop14_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop14_f77 FLOOP14_F77 - #define floop14_f77overhead FLOOP14_F77OVERHEAD - #define floop14_f90 FLOOP14_F90 - #define floop14_f90overhead FLOOP14_F90OVERHEAD -#endif - -extern "C" { - void floop14_f77(const int& N, float* y, float* x, float* a, float* b); - void floop14_f77overhead(const int& N, float* y, float* x, float* a, float* b); - void floop14_f90(const int& N, float* y, float* x, float* a, float* b); - void floop14_f90overhead(const int& N, float* y, float* x, float* a, float* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop14: $x = $a+$b; $y = $a-$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop14.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i); y = a(tensor::i)-b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop14_f77(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop14_f77overhead(N, y, x, a, b); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop14_f90(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop14_f90overhead(N, y, x, a, b); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop14f.f b/benchmarks/floop14f.f deleted file mode 100644 index f68a1048..00000000 --- a/benchmarks/floop14f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop14_F77(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); y(i) = a(i)-b(i); - END DO - RETURN - END - - - SUBROUTINE floop14_F77Overhead(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/floop14f90.f90 b/benchmarks/floop14f90.f90 deleted file mode 100644 index 88878168..00000000 --- a/benchmarks/floop14f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop14_F90(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - - x = a+b; y = a-b - RETURN - END - - - SUBROUTINE floop14_F90Overhead(N, y, x, a, b) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/floop15.cpp b/benchmarks/floop15.cpp deleted file mode 100644 index fa1d1307..00000000 --- a/benchmarks/floop15.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// floop15 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop15_f77 floop15_f77_ - #define floop15_f77overhead floop15_f77overhead_ - #define floop15_f90 floop15_f90_ - #define floop15_f90overhead floop15_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop15_f77 floop15_f77__ - #define floop15_f77overhead floop15_f77overhead__ - #define floop15_f90 floop15_f90__ - #define floop15_f90overhead floop15_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop15_f77 FLOOP15_F77 - #define floop15_f77overhead FLOOP15_F77OVERHEAD - #define floop15_f90 FLOOP15_F90 - #define floop15_f90overhead FLOOP15_F90OVERHEAD -#endif - -extern "C" { - void floop15_f77(const int& N, float* x, float* a, float* b, float* c); - void floop15_f77overhead(const int& N, float* x, float* a, float* b, float* c); - void floop15_f90(const int& N, float* x, float* a, float* b, float* c); - void floop15_f90overhead(const int& N, float* x, float* a, float* b, float* c); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop15: $x = $c + $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop15.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c(tensor::i) + a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop15_f77(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop15_f77overhead(N, x, a, b, c); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop15_f90(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop15_f90overhead(N, x, a, b, c); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop15f.f b/benchmarks/floop15f.f deleted file mode 100644 index e01b6c10..00000000 --- a/benchmarks/floop15f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop15_F77(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - - DO i=1,N - x(i) = c(i) + a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE floop15_F77Overhead(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - RETURN - END diff --git a/benchmarks/floop15f90.f90 b/benchmarks/floop15f90.f90 deleted file mode 100644 index 91b3e201..00000000 --- a/benchmarks/floop15f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop15_F90(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - - x = c + a*b - RETURN - END - - - SUBROUTINE floop15_F90Overhead(N, x, a, b, c) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N) - - RETURN - END diff --git a/benchmarks/floop16.cpp b/benchmarks/floop16.cpp deleted file mode 100644 index d86927b0..00000000 --- a/benchmarks/floop16.cpp +++ /dev/null @@ -1,505 +0,0 @@ - -// floop16 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop16_f77 floop16_f77_ - #define floop16_f77overhead floop16_f77overhead_ - #define floop16_f90 floop16_f90_ - #define floop16_f90overhead floop16_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop16_f77 floop16_f77__ - #define floop16_f77overhead floop16_f77overhead__ - #define floop16_f90 floop16_f90__ - #define floop16_f90overhead floop16_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop16_f77 FLOOP16_F77 - #define floop16_f77overhead FLOOP16_F77OVERHEAD - #define floop16_f90 FLOOP16_F90 - #define floop16_f90overhead FLOOP16_F90OVERHEAD -#endif - -extern "C" { - void floop16_f77(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - void floop16_f77overhead(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - void floop16_f90(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - void floop16_f90overhead(const int& N, float* y, float* x, float* a, float* b, float* c, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop16: $x = $a+$b+$c; $y = $x+$c+u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop16.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i); y = x(tensor::i)+c(tensor::i)+u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+5); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+5); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop16_f77(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop16_f77overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop16_f90(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop16_f90overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop16f.f b/benchmarks/floop16f.f deleted file mode 100644 index de0c0935..00000000 --- a/benchmarks/floop16f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop16_F77(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i); y(i) = x(i)+c(i)+u; - END DO - RETURN - END - - - SUBROUTINE floop16_F77Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/floop16f90.f90 b/benchmarks/floop16f90.f90 deleted file mode 100644 index 5956a269..00000000 --- a/benchmarks/floop16f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop16_F90(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - - x = a+b+c; y = x+c+u - RETURN - END - - - SUBROUTINE floop16_F90Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/floop17.cpp b/benchmarks/floop17.cpp deleted file mode 100644 index d70b156d..00000000 --- a/benchmarks/floop17.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop17 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop17_f77 floop17_f77_ - #define floop17_f77overhead floop17_f77overhead_ - #define floop17_f90 floop17_f90_ - #define floop17_f90overhead floop17_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop17_f77 floop17_f77__ - #define floop17_f77overhead floop17_f77overhead__ - #define floop17_f90 floop17_f90__ - #define floop17_f90overhead floop17_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop17_f77 FLOOP17_F77 - #define floop17_f77overhead FLOOP17_F77OVERHEAD - #define floop17_f90 FLOOP17_F90 - #define floop17_f90overhead FLOOP17_F90OVERHEAD -#endif - -extern "C" { - void floop17_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop17_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop17_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop17_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop17: $x = ($a+$b)*($c+$d)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop17.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a(tensor::i)+b(tensor::i))*(c(tensor::i)+d(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop17_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop17_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop17_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop17_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop17f.f b/benchmarks/floop17f.f deleted file mode 100644 index 6dbd154b..00000000 --- a/benchmarks/floop17f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop17_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = (a(i)+b(i))*(c(i)+d(i)); - END DO - RETURN - END - - - SUBROUTINE floop17_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop17f90.f90 b/benchmarks/floop17f90.f90 deleted file mode 100644 index 2a676fa4..00000000 --- a/benchmarks/floop17f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop17_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = (a+b)*(c+d) - RETURN - END - - - SUBROUTINE floop17_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop18.cpp b/benchmarks/floop18.cpp deleted file mode 100644 index 9e075c9d..00000000 --- a/benchmarks/floop18.cpp +++ /dev/null @@ -1,462 +0,0 @@ - -// floop18 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop18_f77 floop18_f77_ - #define floop18_f77overhead floop18_f77overhead_ - #define floop18_f90 floop18_f90_ - #define floop18_f90overhead floop18_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop18_f77 floop18_f77__ - #define floop18_f77overhead floop18_f77overhead__ - #define floop18_f90 floop18_f90__ - #define floop18_f90overhead floop18_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop18_f77 FLOOP18_F77 - #define floop18_f77overhead FLOOP18_F77OVERHEAD - #define floop18_f90 FLOOP18_F90 - #define floop18_f90overhead FLOOP18_F90OVERHEAD -#endif - -extern "C" { - void floop18_f77(const int& N, float* x, float* a, float* b, const float& u, const float& v); - void floop18_f77overhead(const int& N, float* x, float* a, float* b, const float& u, const float& v); - void floop18_f90(const int& N, float* x, float* a, float* b, const float& u, const float& v); - void floop18_f90overhead(const int& N, float* x, float* a, float* b, const float& u, const float& v); - -} - -void VectorVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_index(BenchmarkExt& bench, float u, float v); -void doTinyVectorVersion(BenchmarkExt& bench, float u, float v); -void F77Version(BenchmarkExt& bench, float u, float v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop18: $x = (u+$a)*(v+$b)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - float v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop18.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a(tensor::i))*(v+b(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop18_f77(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop18_f77overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop18_f90(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop18_f90overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop18f.f b/benchmarks/floop18f.f deleted file mode 100644 index 7d24ebf0..00000000 --- a/benchmarks/floop18f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop18_F77(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = (u+a(i))*(v+b(i)); - END DO - RETURN - END - - - SUBROUTINE floop18_F77Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/floop18f90.f90 b/benchmarks/floop18f90.f90 deleted file mode 100644 index 1e758c40..00000000 --- a/benchmarks/floop18f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop18_F90(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - - x = (u+a)*(v+b) - RETURN - END - - - SUBROUTINE floop18_F90Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*4 x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/floop19.cpp b/benchmarks/floop19.cpp deleted file mode 100644 index 8a2461ec..00000000 --- a/benchmarks/floop19.cpp +++ /dev/null @@ -1,484 +0,0 @@ - -// floop19 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop19_f77 floop19_f77_ - #define floop19_f77overhead floop19_f77overhead_ - #define floop19_f90 floop19_f90_ - #define floop19_f90overhead floop19_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop19_f77 floop19_f77__ - #define floop19_f77overhead floop19_f77overhead__ - #define floop19_f90 floop19_f90__ - #define floop19_f90overhead floop19_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop19_f77 FLOOP19_F77 - #define floop19_f77overhead FLOOP19_F77OVERHEAD - #define floop19_f90 FLOOP19_F90 - #define floop19_f90overhead FLOOP19_F90OVERHEAD -#endif - -extern "C" { - void floop19_f77(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - void floop19_f77overhead(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - void floop19_f90(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - void floop19_f90overhead(const int& N, float* y, float* x, float* a, float* b, const float& u, const float& v); - -} - -void VectorVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion(BenchmarkExt& bench, float u, float v); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v); -void ArrayVersion_index(BenchmarkExt& bench, float u, float v); -void doTinyVectorVersion(BenchmarkExt& bench, float u, float v); -void F77Version(BenchmarkExt& bench, float u, float v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop19: $x = u*$a; $y = v*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - float v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop19.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a(tensor::i); y = v*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop19_f77(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop19_f77overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* y = new float[N]; - initializeRandomDouble(y, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop19_f90(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop19_f90overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop19f.f b/benchmarks/floop19f.f deleted file mode 100644 index 04207d2a..00000000 --- a/benchmarks/floop19f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop19_F77(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = u*a(i); y(i) = v*b(i); - END DO - RETURN - END - - - SUBROUTINE floop19_F77Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/floop19f90.f90 b/benchmarks/floop19f90.f90 deleted file mode 100644 index c7d26d3d..00000000 --- a/benchmarks/floop19f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop19_F90(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - - x = u*a; y = v*b - RETURN - END - - - SUBROUTINE floop19_F90Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*4 y(N), x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/floop1f.f b/benchmarks/floop1f.f deleted file mode 100644 index cece4298..00000000 --- a/benchmarks/floop1f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop1_F77(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - - DO i=1,N - x(i) = sqrt(y(i)); - END DO - RETURN - END - - - SUBROUTINE floop1_F77Overhead(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - RETURN - END diff --git a/benchmarks/floop1f90.f90 b/benchmarks/floop1f90.f90 deleted file mode 100644 index 8a37efbc..00000000 --- a/benchmarks/floop1f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop1_F90(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - - x = sqrt(y) - RETURN - END - - - SUBROUTINE floop1_F90Overhead(N, x, y) - INTEGER i, N - REAL*4 x(N), y(N) - - RETURN - END diff --git a/benchmarks/floop2.cpp b/benchmarks/floop2.cpp deleted file mode 100644 index 36afc01d..00000000 --- a/benchmarks/floop2.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// floop2 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop2_f77 floop2_f77_ - #define floop2_f77overhead floop2_f77overhead_ - #define floop2_f90 floop2_f90_ - #define floop2_f90overhead floop2_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop2_f77 floop2_f77__ - #define floop2_f77overhead floop2_f77overhead__ - #define floop2_f90 floop2_f90__ - #define floop2_f90overhead floop2_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop2_f77 FLOOP2_F77 - #define floop2_f77overhead FLOOP2_F77OVERHEAD - #define floop2_f90 FLOOP2_F90 - #define floop2_f90overhead FLOOP2_F90OVERHEAD -#endif - -extern "C" { - void floop2_f77(const int& N, float* x, float* y, const float& u); - void floop2_f77overhead(const int& N, float* x, float* y, const float& u); - void floop2_f90(const int& N, float* x, float* y, const float& u); - void floop2_f90overhead(const int& N, float* x, float* y, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop2: $x = $y/u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop2.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y(tensor::i)/u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop2_f77(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop2_f77overhead(N, x, y, u); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop2_f90(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop2_f90overhead(N, x, y, u); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop21.cpp b/benchmarks/floop21.cpp deleted file mode 100644 index d91b5bba..00000000 --- a/benchmarks/floop21.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop21 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop21_f77 floop21_f77_ - #define floop21_f77overhead floop21_f77overhead_ - #define floop21_f90 floop21_f90_ - #define floop21_f90overhead floop21_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop21_f77 floop21_f77__ - #define floop21_f77overhead floop21_f77overhead__ - #define floop21_f90 floop21_f90__ - #define floop21_f90overhead floop21_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop21_f77 FLOOP21_F77 - #define floop21_f77overhead FLOOP21_F77OVERHEAD - #define floop21_f90 FLOOP21_F90 - #define floop21_f90overhead FLOOP21_F90OVERHEAD -#endif - -extern "C" { - void floop21_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop21_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop21_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop21_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop21: $x = $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop21.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop21_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop21_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop21_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop21_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop21f.f b/benchmarks/floop21f.f deleted file mode 100644 index 00c1164d..00000000 --- a/benchmarks/floop21f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop21_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE floop21_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop21f90.f90 b/benchmarks/floop21f90.f90 deleted file mode 100644 index 78a37e70..00000000 --- a/benchmarks/floop21f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop21_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = a*b + c*d - RETURN - END - - - SUBROUTINE floop21_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop22.cpp b/benchmarks/floop22.cpp deleted file mode 100644 index e0b94b51..00000000 --- a/benchmarks/floop22.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// floop22 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop22_f77 floop22_f77_ - #define floop22_f77overhead floop22_f77overhead_ - #define floop22_f90 floop22_f90_ - #define floop22_f90overhead floop22_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop22_f77 floop22_f77__ - #define floop22_f77overhead floop22_f77overhead__ - #define floop22_f90 floop22_f90__ - #define floop22_f90overhead floop22_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop22_f77 FLOOP22_F77 - #define floop22_f77overhead FLOOP22_F77OVERHEAD - #define floop22_f90 FLOOP22_F90 - #define floop22_f90overhead FLOOP22_F90OVERHEAD -#endif - -extern "C" { - void floop22_f77(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop22_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop22_f90(const int& N, float* x, float* a, float* b, float* c, float* d); - void floop22_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop22: $x = $x + $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop22.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i) + a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop22_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop22_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop22_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop22_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop22f.f b/benchmarks/floop22f.f deleted file mode 100644 index c1548f4c..00000000 --- a/benchmarks/floop22f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop22_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = x(i) + a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE floop22_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/floop22f90.f90 b/benchmarks/floop22f90.f90 deleted file mode 100644 index cd47a44b..00000000 --- a/benchmarks/floop22f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop22_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - x = x + a*b + c*d - RETURN - END - - - SUBROUTINE floop22_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/floop23.cpp b/benchmarks/floop23.cpp deleted file mode 100644 index 7aab79df..00000000 --- a/benchmarks/floop23.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// floop23 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop23_f77 floop23_f77_ - #define floop23_f77overhead floop23_f77overhead_ - #define floop23_f90 floop23_f90_ - #define floop23_f90overhead floop23_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop23_f77 floop23_f77__ - #define floop23_f77overhead floop23_f77overhead__ - #define floop23_f90 floop23_f90__ - #define floop23_f90overhead floop23_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop23_f77 FLOOP23_F77 - #define floop23_f77overhead FLOOP23_F77OVERHEAD - #define floop23_f90 FLOOP23_F90 - #define floop23_f90overhead FLOOP23_F90OVERHEAD -#endif - -extern "C" { - void floop23_f77(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop23_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop23_f90(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop23_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop23: $x = $a*$b + $c*$d; $y = $b+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop23.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i); y = b(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop23_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop23_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop23_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop23_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop23f.f b/benchmarks/floop23f.f deleted file mode 100644 index 3990cba0..00000000 --- a/benchmarks/floop23f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop23_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); y(i) = b(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE floop23_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/floop23f90.f90 b/benchmarks/floop23f90.f90 deleted file mode 100644 index af620d32..00000000 --- a/benchmarks/floop23f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop23_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*b + c*d; y = b+d - RETURN - END - - - SUBROUTINE floop23_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/floop24.cpp b/benchmarks/floop24.cpp deleted file mode 100644 index da16973c..00000000 --- a/benchmarks/floop24.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// floop24 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop24_f77 floop24_f77_ - #define floop24_f77overhead floop24_f77overhead_ - #define floop24_f90 floop24_f90_ - #define floop24_f90overhead floop24_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop24_f77 floop24_f77__ - #define floop24_f77overhead floop24_f77overhead__ - #define floop24_f90 floop24_f90__ - #define floop24_f90overhead floop24_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop24_f77 FLOOP24_F77 - #define floop24_f77overhead FLOOP24_F77OVERHEAD - #define floop24_f90 FLOOP24_F90 - #define floop24_f90overhead FLOOP24_F90OVERHEAD -#endif - -extern "C" { - void floop24_f77(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop24_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop24_f90(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - void floop24_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* d, float* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop24: $x = $a*$c - $b*$c; $y = $a*$d + $b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop24.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*c(tensor::i) - b(tensor::i)*c(tensor::i); y = a(tensor::i)*d(tensor::i) + b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop24_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop24_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* d = new float[N]; - initializeRandomDouble(d, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop24_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop24_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop24f.f b/benchmarks/floop24f.f deleted file mode 100644 index e0985ba3..00000000 --- a/benchmarks/floop24f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop24_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*c(i) - b(i)*c(i); y(i) = a(i)*d(i) + b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE floop24_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/floop24f90.f90 b/benchmarks/floop24f90.f90 deleted file mode 100644 index 84e3451a..00000000 --- a/benchmarks/floop24f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop24_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*c - b*c; y = a*d + b+c - RETURN - END - - - SUBROUTINE floop24_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/floop25.cpp b/benchmarks/floop25.cpp deleted file mode 100644 index 84011208..00000000 --- a/benchmarks/floop25.cpp +++ /dev/null @@ -1,507 +0,0 @@ - -// floop25 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop25_f77 floop25_f77_ - #define floop25_f77overhead floop25_f77overhead_ - #define floop25_f90 floop25_f90_ - #define floop25_f90overhead floop25_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop25_f77 floop25_f77__ - #define floop25_f77overhead floop25_f77overhead__ - #define floop25_f90 floop25_f90__ - #define floop25_f90overhead floop25_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop25_f77 FLOOP25_F77 - #define floop25_f77overhead FLOOP25_F77OVERHEAD - #define floop25_f90 FLOOP25_F90 - #define floop25_f90overhead FLOOP25_F90OVERHEAD -#endif - -extern "C" { - void floop25_f77(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - void floop25_f77overhead(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - void floop25_f90(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - void floop25_f90overhead(const int& N, float* x, float* a, float* b, float* c, float* y, const float& u, const float& v, const float& w); - -} - -void VectorVersion(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v, float w); -void ArrayVersion_index(BenchmarkExt& bench, float u, float v, float w); -void doTinyVectorVersion(BenchmarkExt& bench, float u, float v, float w); -void F77Version(BenchmarkExt& bench, float u, float v, float w); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v, float w); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v, float w); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop25: $x = u*$b; $y = v*$b + w*$a + u*$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - float v = 0.39123982498157938742; - float w = 0.39123982498157938742; - - - ArrayVersion(bench, u, v, w); - ArrayVersion_unaligned(bench, u, v, w); - ArrayVersion_misaligned(bench, u, v, w); - ArrayVersion_index(bench, u, v, w); - //doTinyVectorVersion(bench, u, v, w); - F77Version(bench, u, v, w); -#ifdef FORTRAN_90 - F90Version(bench, u, v, w); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v, w); -#endif - - if(runvector) - VectorVersion(bench, u, v, w); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop25.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b(tensor::i); y = v*b(tensor::i) + w*a(tensor::i) + u*c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+5); - Array y(yfill(Range(4,N+4-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop25_f77(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop25_f77overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u, float v, float w) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - float* c = new float[N]; - initializeRandomDouble(c, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop25_f90(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop25_f90overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop25f.f b/benchmarks/floop25f.f deleted file mode 100644 index 9f007c6d..00000000 --- a/benchmarks/floop25f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop25_F77(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - - DO i=1,N - x(i) = u*b(i); y(i) = v*b(i) + w*a(i) + u*c(i); - END DO - RETURN - END - - - SUBROUTINE floop25_F77Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - RETURN - END diff --git a/benchmarks/floop25f90.f90 b/benchmarks/floop25f90.f90 deleted file mode 100644 index 33a45013..00000000 --- a/benchmarks/floop25f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop25_F90(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - - x = u*b; y = v*b + w*a + u*c - RETURN - END - - - SUBROUTINE floop25_F90Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*4 x(N), a(N), b(N), c(N), y(N), u, v, w - - RETURN - END diff --git a/benchmarks/floop2f.f b/benchmarks/floop2f.f deleted file mode 100644 index e993794d..00000000 --- a/benchmarks/floop2f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop2_F77(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - - DO i=1,N - x(i) = y(i)/u; - END DO - RETURN - END - - - SUBROUTINE floop2_F77Overhead(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - RETURN - END diff --git a/benchmarks/floop2f90.f90 b/benchmarks/floop2f90.f90 deleted file mode 100644 index 4bccc903..00000000 --- a/benchmarks/floop2f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop2_F90(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - - x = y/u - RETURN - END - - - SUBROUTINE floop2_F90Overhead(N, x, y, u) - INTEGER i, N - REAL*4 x(N), y(N), u - - RETURN - END diff --git a/benchmarks/floop3.cpp b/benchmarks/floop3.cpp deleted file mode 100644 index 95ec16c0..00000000 --- a/benchmarks/floop3.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// floop3 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop3_f77 floop3_f77_ - #define floop3_f77overhead floop3_f77overhead_ - #define floop3_f90 floop3_f90_ - #define floop3_f90overhead floop3_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop3_f77 floop3_f77__ - #define floop3_f77overhead floop3_f77overhead__ - #define floop3_f90 floop3_f90__ - #define floop3_f90overhead floop3_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop3_f77 FLOOP3_F77 - #define floop3_f77overhead FLOOP3_F77OVERHEAD - #define floop3_f90 FLOOP3_F90 - #define floop3_f90overhead FLOOP3_F90OVERHEAD -#endif - -extern "C" { - void floop3_f77(const int& N, float* x, float* y, const float& a); - void floop3_f77overhead(const int& N, float* x, float* y, const float& a); - void floop3_f90(const int& N, float* x, float* y, const float& a); - void floop3_f90overhead(const int& N, float* x, float* y, const float& a); - -} - -void VectorVersion(BenchmarkExt& bench, float a); -void ArrayVersion(BenchmarkExt& bench, float a); -void ArrayVersion_unaligned(BenchmarkExt& bench, float a); -void ArrayVersion_misaligned(BenchmarkExt& bench, float a); -void ArrayVersion_index(BenchmarkExt& bench, float a); -void doTinyVectorVersion(BenchmarkExt& bench, float a); -void F77Version(BenchmarkExt& bench, float a); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float a); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float a); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop3: $y = $y + a*$x", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float a = 0.39123982498157938742; - - - ArrayVersion(bench, a); - ArrayVersion_unaligned(bench, a); - ArrayVersion_misaligned(bench, a); - ArrayVersion_index(bench, a); - //doTinyVectorVersion(bench, a); - F77Version(bench, a); -#ifdef FORTRAN_90 - F90Version(bench, a); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, a); -#endif - - if(runvector) - VectorVersion(bench, a); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop3.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y(tensor::i) + a*x(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop3_f77(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop3_f77overhead(N, x, y, a); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* y = new float[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop3_f90(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop3_f90overhead(N, x, y, a); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop36.cpp b/benchmarks/floop36.cpp deleted file mode 100644 index 63e0b584..00000000 --- a/benchmarks/floop36.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// floop36 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop36_f77 floop36_f77_ - #define floop36_f77overhead floop36_f77overhead_ - #define floop36_f90 floop36_f90_ - #define floop36_f90overhead floop36_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop36_f77 floop36_f77__ - #define floop36_f77overhead floop36_f77overhead__ - #define floop36_f90 floop36_f90__ - #define floop36_f90overhead floop36_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop36_f77 FLOOP36_F77 - #define floop36_f77overhead FLOOP36_F77OVERHEAD - #define floop36_f90 FLOOP36_F90 - #define floop36_f90overhead FLOOP36_F90OVERHEAD -#endif - -extern "C" { - void floop36_f77(const int& N, float* x, float* e); - void floop36_f77overhead(const int& N, float* x, float* e); - void floop36_f90(const int& N, float* x, float* e); - void floop36_f90overhead(const int& N, float* x, float* e); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop36: $x = exp($e)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop36.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector e(N); - initializeRandomDouble(e.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+1); - Array e(efill(Range(1,N))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+2); - Array e(efill(Range(1,N+1-1))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray e(N); - initializeRandomDouble(e, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* e = new float[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop36_f77(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop36_f77overhead(N, x, e); - - bench.stopOverhead(); - - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* e = new float[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop36_f90(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop36_f90overhead(N, x, e); - - bench.stopOverhead(); - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop36f.f b/benchmarks/floop36f.f deleted file mode 100644 index 5850e759..00000000 --- a/benchmarks/floop36f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop36_F77(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - - DO i=1,N - x(i) = exp(e(i)); - END DO - RETURN - END - - - SUBROUTINE floop36_F77Overhead(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - RETURN - END diff --git a/benchmarks/floop36f90.f90 b/benchmarks/floop36f90.f90 deleted file mode 100644 index ba4b5586..00000000 --- a/benchmarks/floop36f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop36_F90(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - - x = exp(e) - RETURN - END - - - SUBROUTINE floop36_F90Overhead(N, x, e) - INTEGER i, N - REAL*4 x(N), e(N) - - RETURN - END diff --git a/benchmarks/floop3f.f b/benchmarks/floop3f.f deleted file mode 100644 index 22b53534..00000000 --- a/benchmarks/floop3f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop3_F77(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - - DO i=1,N - y(i) = y(i) + a*x(i); - END DO - RETURN - END - - - SUBROUTINE floop3_F77Overhead(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - RETURN - END diff --git a/benchmarks/floop3f90.f90 b/benchmarks/floop3f90.f90 deleted file mode 100644 index 17c976a4..00000000 --- a/benchmarks/floop3f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop3_F90(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - - y = y + a*x - RETURN - END - - - SUBROUTINE floop3_F90Overhead(N, x, y, a) - INTEGER i, N - REAL*4 x(N), y(N), a - - RETURN - END diff --git a/benchmarks/floop5.cpp b/benchmarks/floop5.cpp deleted file mode 100644 index 23e69c6e..00000000 --- a/benchmarks/floop5.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// floop5 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop5_f77 floop5_f77_ - #define floop5_f77overhead floop5_f77overhead_ - #define floop5_f90 floop5_f90_ - #define floop5_f90overhead floop5_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop5_f77 floop5_f77__ - #define floop5_f77overhead floop5_f77overhead__ - #define floop5_f90 floop5_f90__ - #define floop5_f90overhead floop5_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop5_f77 FLOOP5_F77 - #define floop5_f77overhead FLOOP5_F77OVERHEAD - #define floop5_f90 FLOOP5_F90 - #define floop5_f90overhead FLOOP5_F90OVERHEAD -#endif - -extern "C" { - void floop5_f77(const int& N, float* x, float* a, float* b); - void floop5_f77overhead(const int& N, float* x, float* a, float* b); - void floop5_f90(const int& N, float* x, float* a, float* b); - void floop5_f90overhead(const int& N, float* x, float* a, float* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop5: $x = $a+$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop5.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop5_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop5_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop5_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop5_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop5f.f b/benchmarks/floop5f.f deleted file mode 100644 index fca4a10a..00000000 --- a/benchmarks/floop5f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop5_F77(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); - END DO - RETURN - END - - - SUBROUTINE floop5_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/floop5f90.f90 b/benchmarks/floop5f90.f90 deleted file mode 100644 index f93915c7..00000000 --- a/benchmarks/floop5f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop5_F90(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - x = a+b - RETURN - END - - - SUBROUTINE floop5_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/floop6.cpp b/benchmarks/floop6.cpp deleted file mode 100644 index 59710fe2..00000000 --- a/benchmarks/floop6.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// floop6 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop6_f77 floop6_f77_ - #define floop6_f77overhead floop6_f77overhead_ - #define floop6_f90 floop6_f90_ - #define floop6_f90overhead floop6_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop6_f77 floop6_f77__ - #define floop6_f77overhead floop6_f77overhead__ - #define floop6_f90 floop6_f90__ - #define floop6_f90overhead floop6_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop6_f77 FLOOP6_F77 - #define floop6_f77overhead FLOOP6_F77OVERHEAD - #define floop6_f90 FLOOP6_F90 - #define floop6_f90overhead FLOOP6_F90OVERHEAD -#endif - -extern "C" { - void floop6_f77(const int& N, float* x, float* a, float* b); - void floop6_f77overhead(const int& N, float* x, float* a, float* b); - void floop6_f90(const int& N, float* x, float* a, float* b); - void floop6_f90overhead(const int& N, float* x, float* a, float* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop6: $x = $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop6.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop6_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop6_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* x = new float[N]; - initializeRandomDouble(x, N); - float* a = new float[N]; - initializeRandomDouble(a, N); - float* b = new float[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop6_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop6_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop6f.f b/benchmarks/floop6f.f deleted file mode 100644 index 44e34043..00000000 --- a/benchmarks/floop6f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop6_F77(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE floop6_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/floop6f90.f90 b/benchmarks/floop6f90.f90 deleted file mode 100644 index 12bb2e5b..00000000 --- a/benchmarks/floop6f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop6_F90(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - x = a*b - RETURN - END - - - SUBROUTINE floop6_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*4 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/floop8.cpp b/benchmarks/floop8.cpp deleted file mode 100644 index afc2e678..00000000 --- a/benchmarks/floop8.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// floop8 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop8_f77 floop8_f77_ - #define floop8_f77overhead floop8_f77overhead_ - #define floop8_f90 floop8_f90_ - #define floop8_f90overhead floop8_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop8_f77 floop8_f77__ - #define floop8_f77overhead floop8_f77overhead__ - #define floop8_f90 floop8_f90__ - #define floop8_f90overhead floop8_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop8_f77 FLOOP8_F77 - #define floop8_f77overhead FLOOP8_F77OVERHEAD - #define floop8_f90 FLOOP8_F90 - #define floop8_f90overhead FLOOP8_F90OVERHEAD -#endif - -extern "C" { - void floop8_f77(const int& N, float* a, float* x, const float& u); - void floop8_f77overhead(const int& N, float* a, float* x, const float& u); - void floop8_f90(const int& N, float* a, float* x, const float& u); - void floop8_f90overhead(const int& N, float* a, float* x, const float& u); - -} - -void VectorVersion(BenchmarkExt& bench, float u); -void ArrayVersion(BenchmarkExt& bench, float u); -void ArrayVersion_unaligned(BenchmarkExt& bench, float u); -void ArrayVersion_misaligned(BenchmarkExt& bench, float u); -void ArrayVersion_index(BenchmarkExt& bench, float u); -void doTinyVectorVersion(BenchmarkExt& bench, float u); -void F77Version(BenchmarkExt& bench, float u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop8: $x = u/$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - float u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop8.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop8_f77(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop8_f77overhead(N, a, x, u); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, float u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop8_f90(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop8_f90overhead(N, a, x, u); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop8f.f b/benchmarks/floop8f.f deleted file mode 100644 index 709e3dc6..00000000 --- a/benchmarks/floop8f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop8_F77(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - - DO i=1,N - x(i) = u/a(i); - END DO - RETURN - END - - - SUBROUTINE floop8_F77Overhead(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - RETURN - END diff --git a/benchmarks/floop8f90.f90 b/benchmarks/floop8f90.f90 deleted file mode 100644 index 12467c9f..00000000 --- a/benchmarks/floop8f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop8_F90(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - - x = u/a - RETURN - END - - - SUBROUTINE floop8_F90Overhead(N, a, x, u) - INTEGER i, N - REAL*4 a(N), x(N), u - - RETURN - END diff --git a/benchmarks/floop9.cpp b/benchmarks/floop9.cpp deleted file mode 100644 index 3b8d95f3..00000000 --- a/benchmarks/floop9.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// floop9 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define floop9_f77 floop9_f77_ - #define floop9_f77overhead floop9_f77overhead_ - #define floop9_f90 floop9_f90_ - #define floop9_f90overhead floop9_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define floop9_f77 floop9_f77__ - #define floop9_f77overhead floop9_f77overhead__ - #define floop9_f90 floop9_f90__ - #define floop9_f90overhead floop9_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define floop9_f77 FLOOP9_F77 - #define floop9_f77overhead FLOOP9_F77OVERHEAD - #define floop9_f90 FLOOP9_F90 - #define floop9_f90overhead FLOOP9_F90OVERHEAD -#endif - -extern "C" { - void floop9_f77(const int& N, float* a, float* x); - void floop9_f77overhead(const int& N, float* a, float* x); - void floop9_f90(const int& N, float* a, float* x); - void floop9_f90overhead(const int& N, float* a, float* x); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("floop9: $x = $x+$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("floop9.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i)+a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop9_f77(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop9_f77overhead(N, a, x); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - float* a = new float[N]; - initializeRandomDouble(a, N); - float* x = new float[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - floop9_f90(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - floop9_f90overhead(N, a, x); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/floop9f.f b/benchmarks/floop9f.f deleted file mode 100644 index c6f00d9f..00000000 --- a/benchmarks/floop9f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE floop9_F77(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - - DO i=1,N - x(i) = x(i)+a(i); - END DO - RETURN - END - - - SUBROUTINE floop9_F77Overhead(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - RETURN - END diff --git a/benchmarks/floop9f90.f90 b/benchmarks/floop9f90.f90 deleted file mode 100644 index 8055aa94..00000000 --- a/benchmarks/floop9f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE floop9_F90(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - - x = x+a - RETURN - END - - - SUBROUTINE floop9_F90Overhead(N, a, x) - INTEGER i, N - REAL*4 a(N), x(N) - - RETURN - END diff --git a/benchmarks/frek.m b/benchmarks/frek.m deleted file mode 100644 index efe9885e..00000000 --- a/benchmarks/frek.m +++ /dev/null @@ -1,7 +0,0 @@ -function frek(Mf) - A = zeros(1,4); - A = max(Mf); - A./A(1,3) - A = Mf(19,:); - A./A(1,3) - diff --git a/benchmarks/haney.cpp b/benchmarks/haney.cpp deleted file mode 100644 index 9226272a..00000000 --- a/benchmarks/haney.cpp +++ /dev/null @@ -1,207 +0,0 @@ -// Haney's induction calculation benchmark. -// -// See: Scott W. Haney, Is C++ Fast Enough for Scientific Computing? -// Computers in Physics Vol. 8 No. 6 (1994), p. 690 -// -// Arch D. Robison, C++ Gets Faster for Scientific Computing, -// Computers in Physics Vol. 10 No. 5 (1996), p. 458 -// - -#include -#include -#include -#ifdef BZ_HAVE_STD -#include -#else -#include -#endif - -using namespace blitz; - -#ifndef M_PI - #define M_PI 3.14159265358979323846 -#endif - -#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES -#define vecopsf vecopsf_ -#define vecopsfo vecopsfo_ -#endif - -extern "C" -{ - void vecopsf(float *li, const float *R, const float *w, const int &N, - const int& iters); - void vecopsfo(float *li, const float *R, const float *w, const int &N, - const int& iters); -} - -inline float sqr(float x) -{ - return x*x; -} - -const float Mu0 = 4.0 * M_PI * 1.0e-7; - -void HaneyCVersion(BenchmarkExt& bench); -void HaneyFortranVersion(BenchmarkExt& bench); -void HaneyBlitzVersion(BenchmarkExt& bench); - -int main() -{ - BenchmarkExt bench("Haney Inductance Calculation", 3); - - bench.setRateDescription("Operations/s"); - - bench.beginBenchmarking(); - - HaneyCVersion(bench); - HaneyFortranVersion(bench); - HaneyBlitzVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("haney.m"); - - return 0; -} - -void initializeRandom(float* data, int length) -{ - Random unif(1.0, 2.0); - for (int i=0; i < length; ++i) - data[i] = unif.random(); -} - -void HaneyCVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Inlined C"); - - while (!bench.doneImplementationBenchmark()) { - int length = bench.getParameter(); - long iters = bench.getIterations(); - - cout << "length = " << length << " iters = " << iters << endl; - - float* li = new float[length]; - float* R = new float[length]; - float* w = new float[length]; - - initializeRandom(li, length); - initializeRandom(R, length); - initializeRandom(w, length); - - // Tickle the cache - for (int i=0; i < length; ++i) - li[i] = R[i] + log(w[i]); - - bench.start(); - - for (long j=0; j < iters; ++j) { - for (int i=0; i < length; ++i) { - li[i] = Mu0 * R[i] * - (0.5 * (1.0 + (1.0/24.0) - * sqr(w[i]/R[i])) * log(32.0 * sqr(R[i]/w[i])) - + 0.05 * sqr(w[i]/R[i]) - 0.85); - } - } - - bench.stop(); - - // Subtract the loop overhead - bench.startOverhead(); - - for (long j=0; j < iters; ++j) {} - - - - bench.stopOverhead(); - - delete [] li; - delete [] w; - delete [] R; - } - - bench.endImplementation(); -} - -void HaneyFortranVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran"); - - while (!bench.doneImplementationBenchmark()) { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - cout << "length = " << length << " iters = " << iters << endl; - - float* li = new float[length]; - float* R = new float[length]; - float* w = new float[length]; - - initializeRandom(li, length); - initializeRandom(R, length); - initializeRandom(w, length); - - // Tickle - int oneIter = 1; - vecopsf(li, R, w, length, oneIter); - - // Time - bench.start(); - vecopsf(li, R, w, length, iters); - bench.stop(); - - // Time overhead - bench.startOverhead(); - vecopsfo(li, R, w, length, iters); - bench.stopOverhead(); - - delete [] li; - delete [] w; - delete [] R; - } - - bench.endImplementation(); -} - -void HaneyBlitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector li(length), R(length), w(length); - initializeRandom(li.data(), length); - initializeRandom(R.data(), length); - initializeRandom(w.data(), length); - - cout << "length = " << length << " iters = " << iters << endl; - - // Tickle - li = w + log(R); - - // Time - bench.start(); - for (long i=0; i < iters; ++i) { -#if defined(__GNUC__) && (__GNUC__ < 3) - li = Mu0 * R * ( (0.5 + (0.5/24.0) * sqr(w/R) ) - * log(32.0 * sqr(R/w)) + 0.05 * sqr(w/R) - 0.85); -#else - li = Mu0 * R * (0.5 * (1.0 + (1.0/24.0) * sqr(w/R)) - * log(32.0 * sqr(R/w)) + 0.05 * sqr(w/R) - 0.85); -#endif - } - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} - diff --git a/benchmarks/haneyf.f b/benchmarks/haneyf.f deleted file mode 100644 index 4731d716..00000000 --- a/benchmarks/haneyf.f +++ /dev/null @@ -1,87 +0,0 @@ -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -C -C AUTHOR -C Scott Haney, LLNL, swhaney@llnl.gov, (510) 423-6308 -C -C COPYRIGHT -C Copyright (c) 1995. The Regents of the University of California. -C All rights reserved. -C -C Permission to use, copy, modify, and distribute this software for any -C purpose without fee is hereby granted, provided that this entire -C notice is included in all copies of any software which is or includes -C a copy or modification of this software and in all copies of the -C a copy or modification of this software and in all copies of the -C supporting documentation for such software. -C -C This work was produced at the University of California, Lawrence -C Livermore National Laboratory under contract no. W-7405-ENG-48 between -C the U.S. Department of Energy and The Regents of the University of -C California for the operation of UC LLNL. -C -C DISCLAIMER -C This software was prepared as an account of work sponsored by an -C agency of the United States Government. Neither the United States -C Government nor the University of California nor any of their -C employees, makes any warranty, express or implied, or assumes any -C liability or responsibility for the accuracy, completeness, or -C usefulness of any information, apparatus, product, or process -C disclosed, or represents that its use would not infringe -C privately-owned rights. Reference herein to any specific commercial -C products, process, or service by trade name, trademark, manufacturer, -C privately-owned rights. Reference herein to any specific commercial -C products, process, or service by trade name, trademark, manufacturer, -C or otherwise, does not necessarily constitute or imply its -C endorsement, recommendation, or favoring by the United States -C Government or the University of California. The views and opinions of -C authors expressed herein do not necessarily state or reflect those of -C the United States Government or the University of California, and -C shall not be used for advertising or product endorsement purposes. -C -C -C CREATED -C July 22, 1994 -C -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - subroutine vecopsf(li, R, a, n, iters) - - integer i - real li(n),R(n),a(n) - real xmu0, xm24 - parameter(xmu0 = 4.0 * 3.1415926535 * 1.0e-7) - parameter(xm24 = 1.0 / 24.0) - - do 2000 j = 1, iters - do 1000 i = 1, n - li(i) = xmu0 * R(i) * - . (0.5 * (1.0 + xm24 * (a(i) / R(i))**2) * - . log(32.0 * (R(i) / a(i))**2) + - . 0.84 * (a(i) / R(i))**2 - 0.2) - - 1000 continue - 2000 continue - - return - end - -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC -C -C Overhead timing -C Added January 1997, Todd Veldhuizen -C -CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - - subroutine vecopsfo(li, R, a, n, iters) - - integer i - real li(n),R(n),a(n) - real xmu0, xm24 - parameter(xmu0 = 4.0 * 3.1415926535 * 1.0e-7) - parameter(xm24 = 1.0 / 24.0) - - do 2000 j = 1, iters - 2000 continue - - return - end diff --git a/benchmarks/hao-he-mark.cpp b/benchmarks/hao-he-mark.cpp deleted file mode 100644 index b6c9577a..00000000 --- a/benchmarks/hao-he-mark.cpp +++ /dev/null @@ -1,56 +0,0 @@ -struct Complex { - - Complex(double _re, double _im) - { - re = _re; - im = _im; - } - - Complex& operator+=(const Complex& a) - { - re += a.re; - im += a.im; - } - - Complex operator*(const Complex& a) const - { - return Complex(re*a.re-im*a.im, re*a.im+im*a.re); - } - - double real() const { return re; } - double imag() const { return im; } - - double re, im; -}; - -#if 0 -inline Complex sqr1(const Complex& a) -{ - return Complex(a.real() * a.real() - a.imag() * a.imag(), - 2 * a.real() * a.imag()); -} - -inline Complex sqr2(const Complex& a) -{ - return Complex(a.re * a.re - a.im * a.im, 2 * a.re * a.im); -} - -inline Complex sqr3(const Complex& a) -{ - double r = a.re; - double i = a.im; - return Complex(r*r-i*i, 2*r*i); -} -#endif - -void foo(Complex& a, const Complex& b) -{ - a += sqr(b); -} - -void foo2(Complex* __restrict__ a, Complex* __restrict__ b, int N) -{ - for (int i=0; i < N; ++i) - a[i] += sqr(b[i]); -} - diff --git a/benchmarks/hao-he.cpp b/benchmarks/hao-he.cpp deleted file mode 100644 index 601fb989..00000000 --- a/benchmarks/hao-he.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * This benchmark times the performance of B += sqr(A), where A and B - * are complex arrays. - * - * Note: need to use -mv8 for SPARC v8. - */ - -#include -#include - -using namespace blitz; - -#include - -typedef Array,1> CArray; - -void setup(Array,1>& A, Array,1>& B) -{ - int n = A.extent(firstDim); - - for (int i=0; i < n; ++i) - { - double x=-10.+20./(n-1.0)*i; - A(i)=sin(x); - B(i)=sin(x); - } -} - -void version1(CArray& A, CArray& B, int nIters) -{ - Timer timer; - - // Array notation - setup(A, B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - B += A*A; - } - timer.stop(); - cout << "Time using array notation b += a*a: " << timer.elapsedSeconds() - << endl; -} - -void version2(CArray& A, CArray& B, int nIters) -{ - // Array notation, using sqr(a) - Timer timer; - - setup(A,B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - B += sqr(A); - } - timer.stop(); - cout << "Time using array notation b += sqr(a): " << timer.elapsedSeconds() - << endl; -} - -void version2c(CArray& A, CArray& B) -{ - B += sqr(A); -} - -void version2b(CArray& A, CArray& B, int nIters) -{ - // Array notation, using sqr(a) - Timer timer; - - setup(A,B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - version2c(A,B); - } - timer.stop(); - cout << "Time using array notation b += sqr(a): " << timer.elapsedSeconds() - << endl; -} - -void version3(CArray& A, CArray& B, int nIters) -{ - Timer timer; - - int N = A.extent(firstDim); - - // Low-level implementation - setup(A,B); - timer.start(); - for (int i=0; i < nIters; ++i) - { - for (int j=0; j < N; ++j) - B(j) += A(j) * A(j); - } - timer.stop(); - cout << "Time using low-level version: " << timer.elapsedSeconds() - << endl; -} - -void version4(CArray& A, CArray& B, int nIters) -{ - Timer timer; - - struct cmplx { - double re, im; - }; - cmplx* a = (cmplx*)A.data(); - cmplx* b = (cmplx*)B.data(); - setup(A,B); - int N = A.extent(firstDim); - - timer.start(); - for (int i=0; i < nIters; ++i) - { - for (int j=0; j < N; ++j) - { - double ar = a[j].re; - double ai = a[j].im; - b[j].re += ar*ar - ai*ai; - b[j].im += 2 * ar * ai; - } - } - timer.stop(); - cout << "Time using really low-level version: " << timer.elapsedSeconds() - << endl; -} - -int run(int N, int nIters) -{ - Array,1> A(N), B(N); - - version1(A,B,nIters); - version2(A,B,nIters); - version2b(A,B,nIters); - version3(A,B,nIters); - version4(A,B,nIters); - - return 0; -} - -int main() -{ - cout << "In-cache:" << endl; - run(256,39063); - - cout << endl << "Out-of-cache:" << endl; - run(1000000,10); -} - diff --git a/benchmarks/iter.cpp b/benchmarks/iter.cpp deleted file mode 100644 index 352bb602..00000000 --- a/benchmarks/iter.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include -#include -#include - -int main() { - using namespace blitz; - - typedef Array Image; - Image A(512,512,512); - Timer timer; - - A = 0.0; - timer.start(); - for (unsigned j=0;j<10;++j) { - for (Image::iterator i=A.begin(),end=A.end();i!=end;++i) { - const TinyVector pos = i.position(); - *i += pos(0)+pos(1)+pos(2); - } - } - timer.stop(); - double flops = 10.0*512*512*512*2; - double seconds = timer.elapsed(); - - double timePerOp = seconds / flops; - - cout << "ops = " << flops << endl - << "seconds = " << seconds << endl; - - double Mflops = flops / seconds / 1.0e+6; - cout << "Mflops = " << Mflops << endl; - - return 0; -} diff --git a/benchmarks/kepler.cpp b/benchmarks/kepler.cpp deleted file mode 100644 index b3b2d791..00000000 --- a/benchmarks/kepler.cpp +++ /dev/null @@ -1,243 +0,0 @@ -#include -#include - -using namespace blitz; - -/* - * Test a 12th order symmetric multistep method for solving the equations - * of motion of a single planet circling the Sun. The Sun is fixed in - * space. - * - * Original F77 version written by John K. Prentice, Quetzal Computational - * Associates, 21 Decmber 1992 - * - * Blitz++ version by Todd Veldhuizen, 17 August 1997 - * The C++ version is a faithful translation of the Fortran 90 version, - * so apologies for the "C++Tran" style. - */ - -inline double relativeError(double a, double b) -{ - if (b != 0.0) - return (a - b) / b; - else - return a; -} - -int main() -{ - Vector x_position_numerical(13), y_position_numerical(13), - alpha(13), beta(13), gamma(13), x_acceleration(13), y_acceleration(13); - - /* - * 12th order symmetric method coefficients - * - * Reference: "Symmetric Multistep Methods for the Numerical - * Integration of Planetary Orbits", G. D. Quinlan and - * S. Tremaine, The Astronomical Journal, 100 (1990), page 1695. - * - * Note!! The beta below are actually 53,222,400 times the - * real beta. This common factor is divided out in the - * symmetric multistep calculation itself, in order to minimize - * round-off - */ - - const double beta_factor = 53222400.0; - alpha = 1.0, -2.0, 2.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 2.0, -2.0, 1.0; - beta = 0.0, 90987349.0, -229596838.0, 812627169.0, -1628539944.0, - 2714971338.0, -3041896548.0, 2714971338.0, -1628539944.0, - 812627169.0, -229596838.0, 90987349.0, 0.0; - - /* - * 12th order Cowell predictor coefficients - * - * Reference: "Astronomical Papers Prepared for the Use of the - * American Ephemeris and Nautical Almanac", C. J. Cohen, E. C. - * Hubbard, and C. Oesterwinter, 22 (1973), page 20-21. - * - * Note!! The gamma below are actually 1,743,565,824,000 times - * the real gamma. This common factor is divided out in the - * Cowell predictor calculation itself, in order to minimize - * round-off - */ - - const double gamma_factor = 1743565824000.0; - gamma = 9072652009253.0, -39726106418680.0, 140544566352762.0, - -344579280210129.0, 613137294629235.0, -811345852376496.0, - 807012356281740.0, -602852367932304.0, 333888089374395.0, - -133228219027160.0, 36262456774618.0, -6033724094760.0, - 463483373517.0; - - // Initialize variables - - const double time_step = 0.25, - stop_time = 365000.0, - radius = 1.0; - double time = - time_step; - - cout << " Position solution via 12th order symmetric multistep method\n" - << " Velocity solution via 12th order Cowell predictor method\n" - << " radius = " << radius << ", time step = " << time_step - << endl; - - // Define a constant which is needed later by the exact solution - const double gaussian_constant = 0.01720209895; - const double gravitational_constant = pow(gaussian_constant,2); - const double constant = sqrt(gravitational_constant/pow(radius,3)); - - // Initialize the first 12 numerical values using the exact values - - double x_position_exact, y_position_exact; - - for (int j=-1; j <= 11; ++j) - { - if (j >= 0) - time += time_step; - - x_position_exact = radius * cos(constant * time); - y_position_exact = radius * sin(constant * time); - - if (j >= 0) - { - x_position_numerical(j) = x_position_exact; - y_position_numerical(j) = y_position_exact; - } - - x_acceleration(j+1) = -gravitational_constant/pow(radius,3) - * x_position_exact; - y_acceleration(j+1) = -gravitational_constant/pow(radius,3) - * y_position_exact; - } - - /* - * Compute exact kinetic and potential energies, and the - * angular momentum. These values are all divided by the mass - * of the object. Since they are conserved, they will never change - * and hence do not have to be recalculated later. - */ - - double x_dot_exact = -radius * constant * sin(constant*time), - y_dot_exact = radius * constant * cos(constant*time), - exact_velocity_squared = pow(x_dot_exact,2) + pow(y_dot_exact,2), - exact_kinetic_energy = 0.5 * exact_velocity_squared, - exact_potential_energy = -gravitational_constant / radius, - exact_total_energy = exact_potential_energy + exact_kinetic_energy, - exact_angular_momentum = x_position_exact * y_dot_exact - - y_position_exact * x_dot_exact; - - double x_dot_numerical, y_dot_numerical; - - // Perform loop over time - - while (time <= stop_time) - { - // Advance time step (eek!) - time += time_step; - - // Calculate new acceleration of body at time=time-time_step - double numerical_radius_squared = pow(x_position_numerical(11),2) - + pow(y_position_numerical(11),2); - x_acceleration(12) = -gravitational_constant - / pow(numerical_radius_squared, 1.5) * x_position_numerical(11); - y_acceleration(12) = -gravitational_constant - / pow(numerical_radius_squared, 1.5) * y_position_numerical(11); - - // Numerically solve for the new positions using a 12th order - // symmetric multistep method. - - // First sum the first and second terms - - double x_alpha_sum = dot(alpha(Range(0,11)), - x_position_numerical(Range(0,11))); - double y_alpha_sum = dot(alpha(Range(0,11)), - y_position_numerical(Range(0,11))); - - double x_beta_sum = dot(beta(Range(0,11)), x_acceleration(Range(1,12))); - double y_beta_sum = dot(beta(Range(0,11)), y_acceleration(Range(1,12))); - x_position_numerical(12) = (-x_alpha_sum) + pow(time_step,2) - * (x_beta_sum / beta_factor); - y_position_numerical(12) = (-y_alpha_sum) + pow(time_step,2) - * (y_beta_sum / beta_factor); - - // Numerically solve for the new velocities using a 12th order - // Cowell predictor method. - - // First sum the gamma terms - - double x_gamma_sum = dot(gamma, x_acceleration.reverse()), - y_gamma_sum = dot(gamma, y_acceleration.reverse()); - - x_dot_numerical = (x_position_numerical(11) - - x_position_numerical(10)) / time_step + time_step - * (x_gamma_sum / gamma_factor); - y_dot_numerical = (y_position_numerical(11) - - y_position_numerical(10)) / time_step + time_step - * (y_gamma_sum / gamma_factor); - - // Push the stack down one - - for (int j=0; j <= 11; ++j) - { - x_position_numerical(j) = x_position_numerical(j+1); - y_position_numerical(j) = y_position_numerical(j+1); - x_acceleration(j) = x_acceleration(j+1); - y_acceleration(j) = y_acceleration(j+1); - } - } - - // Print results - - // First compute energies and angular momenta (add divided by the mass - // of the object) - - double numerical_velocity_squared = pow(x_dot_numerical,2) + - pow(y_dot_numerical,2), - numerical_radius = sqrt(pow(x_position_numerical(12),2) - + pow(y_position_numerical(12),2)), - numerical_kinetic_energy = 0.5 * numerical_velocity_squared, - numerical_potential_energy = -gravitational_constant - / numerical_radius, - numerical_total_energy = numerical_potential_energy - + numerical_kinetic_energy, - numerical_angular_momentum = x_position_numerical(12) - * y_dot_numerical - y_position_numerical(12) * x_dot_numerical; - - // Compute exact results for comparison to the numerical results - - x_position_exact = radius * cos(constant * time); - y_position_exact = radius * sin(constant * time); - x_dot_exact = -radius * constant * sin(constant * time); - y_dot_exact = radius * constant * cos(constant * time); - - // Next compute relative errors - - double radius_error = relativeError(numerical_radius, radius), - x_error = relativeError(x_position_numerical(12), x_position_exact), - y_error = relativeError(y_position_numerical(12), y_position_exact), - x_dot_error = relativeError(x_dot_numerical, x_dot_exact), - y_dot_error = relativeError(y_dot_numerical, y_dot_exact); - - double kinetic_energy_error = relativeError(numerical_kinetic_energy, - exact_kinetic_energy), - potential_energy_error = relativeError(numerical_potential_energy, - exact_potential_energy), - total_energy_error = relativeError(numerical_total_energy, - exact_total_energy), - angular_momentum_error = relativeError(numerical_angular_momentum, - exact_angular_momentum); - - cout << " Time = " << time << endl - << " x rel error = " << x_error << " y rel error = " << y_error - << endl - << " vx rel error = " << x_dot_error << " vy rel error = " - << y_dot_error << endl - << " KE rel error = " << kinetic_energy_error - << " PE rel error = " << potential_energy_error << endl - << " TE rel error = " << total_energy_error << " AM rel error = " - << angular_momentum_error << endl - << " numerical radius = " << numerical_radius - << " radius rel error = " << radius_error << endl; - - return 0; -} - diff --git a/benchmarks/loop1-bug.cpp b/benchmarks/loop1-bug.cpp deleted file mode 100644 index c136ff06..00000000 --- a/benchmarks/loop1-bug.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// In KAI C++ 3.2, restrict causes problems for copy propagation. -// Temporary fix: disable restrict - -#define BZ_DISABLE_RESTRICT - -#include - -using namespace blitz; - -template void sink(T&) { } - -void foo(int N) -{ - Vector x(N); - Vector y(N); - - x=sqrt(y); - - sink(x); - sink(y); -} - diff --git a/benchmarks/loop1.cpp b/benchmarks/loop1.cpp deleted file mode 100644 index a23a2a14..00000000 --- a/benchmarks/loop1.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// loop1 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop1_f77 loop1_f77_ - #define loop1_f77overhead loop1_f77overhead_ - #define loop1_f90 loop1_f90_ - #define loop1_f90overhead loop1_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop1_f77 loop1_f77__ - #define loop1_f77overhead loop1_f77overhead__ - #define loop1_f90 loop1_f90__ - #define loop1_f90overhead loop1_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop1_f77 LOOP1_F77 - #define loop1_f77overhead LOOP1_F77OVERHEAD - #define loop1_f90 LOOP1_F90 - #define loop1_f90overhead LOOP1_F90OVERHEAD -#endif - -extern "C" { - void loop1_f77(const int& N, double* x, double* y); - void loop1_f77overhead(const int& N, double* x, double* y); - void loop1_f90(const int& N, double* x, double* y); - void loop1_f90overhead(const int& N, double* x, double* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop1: $x = sqrt($y)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop1.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = sqrt(y); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop1_f77(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop1_f77overhead(N, x, y); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop1_f90(N, x, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop1_f90overhead(N, x, y); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop10.cpp b/benchmarks/loop10.cpp deleted file mode 100644 index d1c581a1..00000000 --- a/benchmarks/loop10.cpp +++ /dev/null @@ -1,483 +0,0 @@ - -// loop10 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop10_f77 loop10_f77_ - #define loop10_f77overhead loop10_f77overhead_ - #define loop10_f90 loop10_f90_ - #define loop10_f90overhead loop10_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop10_f77 loop10_f77__ - #define loop10_f77overhead loop10_f77overhead__ - #define loop10_f90 loop10_f90__ - #define loop10_f90overhead loop10_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop10_f77 LOOP10_F77 - #define loop10_f77overhead LOOP10_F77OVERHEAD - #define loop10_f90 LOOP10_F90 - #define loop10_f90overhead LOOP10_F90OVERHEAD -#endif - -extern "C" { - void loop10_f77(const int& N, double* x, double* a, double* b, double* c, const double& u); - void loop10_f77overhead(const int& N, double* x, double* a, double* b, double* c, const double& u); - void loop10_f90(const int& N, double* x, double* a, double* b, double* c, const double& u); - void loop10_f90overhead(const int& N, double* x, double* a, double* b, double* c, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop10: $x = u+$a+$b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop10.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a(tensor::i)+b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u+a+b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop10_f77(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop10_f77overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop10_f90(N, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop10_f90overhead(N, x, a, b, c, u); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop100.cpp b/benchmarks/loop100.cpp deleted file mode 100644 index f8f44c86..00000000 --- a/benchmarks/loop100.cpp +++ /dev/null @@ -1,508 +0,0 @@ - -// loop100 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop100_f77 loop100_f77_ - #define loop100_f77overhead loop100_f77overhead_ - #define loop100_f90 loop100_f90_ - #define loop100_f90overhead loop100_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop100_f77 loop100_f77__ - #define loop100_f77overhead loop100_f77overhead__ - #define loop100_f90 loop100_f90__ - #define loop100_f90overhead loop100_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop100_f77 LOOP100_F77 - #define loop100_f77overhead LOOP100_F77OVERHEAD - #define loop100_f90 LOOP100_F90 - #define loop100_f90overhead LOOP100_F90OVERHEAD -#endif - -extern "C" { - void loop100_f77(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - void loop100_f77overhead(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - void loop100_f90(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - void loop100_f90overhead(const int& N, double* a, double* b, double* c, double* d, double* x, const double& u, const double& v, const double& w, const double& z); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w, double z); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w, double z); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v, double w, double z); -void F77Version(BenchmarkExt& bench, double u, double v, double w, double z); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w, double z); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w, double z); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop100: $x=(1.0-$c*$c)/((4*w)*sin(1.0+$c*$c-2*v*$c))*$a*$b*u*exp(-z*$d)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 18 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - double w = 0.39123982498157938742; - double z = 0.39123982498157938742; - - - ArrayVersion(bench, u, v, w, z); - ArrayVersion_unaligned(bench, u, v, w, z); - ArrayVersion_misaligned(bench, u, v, w, z); - ArrayVersion_index(bench, u, v, w, z); - //doTinyVectorVersion(bench, u, v, w, z); - F77Version(bench, u, v, w, z); -#ifdef FORTRAN_90 - F90Version(bench, u, v, w, z); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v, w, z); -#endif - - if(runvector) - VectorVersion(bench, u, v, w, z); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop100.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c(tensor::i)*c(tensor::i))/((4*w)*sin(1.0+c(tensor::i)*c(tensor::i)-2*v*c(tensor::i)))*a(tensor::i)*b(tensor::i)*u*exp(-z*d(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+5); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(1,N+1-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(2,N+2-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(3,N+3-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array xfill(N+5); - Array x(xfill(Range(4,N+4-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop100_f77(N, a, b, c, d, x, u, v, w, z); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop100_f77overhead(N, a, b, c, d, x, u, v, w, z); - - bench.stopOverhead(); - - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w, double z) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop100_f90(N, a, b, c, d, x, u, v, w, z); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop100_f90overhead(N, a, b, c, d, x, u, v, w, z); - - bench.stopOverhead(); - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop100f.f b/benchmarks/loop100f.f deleted file mode 100644 index e5d29204..00000000 --- a/benchmarks/loop100f.f +++ /dev/null @@ -1,18 +0,0 @@ - - SUBROUTINE loop100_F77(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - - DO i=1,N - x(i)=(1.0-c(i)*c(i))/((4*w)*sin(1.0+c(i)*c(i)-2*v*c(i)))*a(i - !)*b(i)*u*exp(-z*d(i)); - END DO - RETURN - END - - - SUBROUTINE loop100_F77Overhead(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - RETURN - END diff --git a/benchmarks/loop100f90.f90 b/benchmarks/loop100f90.f90 deleted file mode 100644 index 2df34734..00000000 --- a/benchmarks/loop100f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop100_F90(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - - x=(1.0-c*c)/((4*w)*sin(1.0+c*c-2*v*c))*a*b*u*exp(-z*d) - RETURN - END - - - SUBROUTINE loop100_F90Overhead(N, a, b, c, d, x, u, v, w, z) - INTEGER i, N - REAL*8 a(N), b(N), c(N), d(N), x(N), u, v, w, z - - RETURN - END diff --git a/benchmarks/loop10f.f b/benchmarks/loop10f.f deleted file mode 100644 index fb4a71c9..00000000 --- a/benchmarks/loop10f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop10_F77(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = u+a(i)+b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE loop10_F77Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/loop10f90.f90 b/benchmarks/loop10f90.f90 deleted file mode 100644 index 7db1ba9f..00000000 --- a/benchmarks/loop10f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop10_F90(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - - x = u+a+b+c - RETURN - END - - - SUBROUTINE loop10_F90Overhead(N, x, a, b, c, u) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/loop11.cpp b/benchmarks/loop11.cpp deleted file mode 100644 index 785dd498..00000000 --- a/benchmarks/loop11.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop11 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop11_f77 loop11_f77_ - #define loop11_f77overhead loop11_f77overhead_ - #define loop11_f90 loop11_f90_ - #define loop11_f90overhead loop11_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop11_f77 loop11_f77__ - #define loop11_f77overhead loop11_f77overhead__ - #define loop11_f90 loop11_f90__ - #define loop11_f90overhead loop11_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop11_f77 LOOP11_F77 - #define loop11_f77overhead LOOP11_F77OVERHEAD - #define loop11_f90 LOOP11_F90 - #define loop11_f90overhead LOOP11_F90OVERHEAD -#endif - -extern "C" { - void loop11_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop11_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop11_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop11_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop11: $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop11.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop11_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop11_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop11_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop11_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop11f.f b/benchmarks/loop11f.f deleted file mode 100644 index 79a2cce1..00000000 --- a/benchmarks/loop11f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop11_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE loop11_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop11f90.f90 b/benchmarks/loop11f90.f90 deleted file mode 100644 index f8066b3c..00000000 --- a/benchmarks/loop11f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop11_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = a+b+c+d - RETURN - END - - - SUBROUTINE loop11_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop12.cpp b/benchmarks/loop12.cpp deleted file mode 100644 index 5a8aaf5f..00000000 --- a/benchmarks/loop12.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// loop12 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop12_f77 loop12_f77_ - #define loop12_f77overhead loop12_f77overhead_ - #define loop12_f90 loop12_f90_ - #define loop12_f90overhead loop12_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop12_f77 loop12_f77__ - #define loop12_f77overhead loop12_f77overhead__ - #define loop12_f90 loop12_f90__ - #define loop12_f90overhead loop12_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop12_f77 LOOP12_F77 - #define loop12_f77overhead LOOP12_F77OVERHEAD - #define loop12_f90 LOOP12_F90 - #define loop12_f90overhead LOOP12_F90OVERHEAD -#endif - -extern "C" { - void loop12_f77(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop12_f77overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop12_f90(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop12_f90overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop12: $y = u+$a; $x = $a+$b+$c+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop12.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a(tensor::i); x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = u+a; x = a+b+c+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop12_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop12_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop12_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop12_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop12f.f b/benchmarks/loop12f.f deleted file mode 100644 index 076c9ebf..00000000 --- a/benchmarks/loop12f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop12_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - y(i) = u+a(i); x(i) = a(i)+b(i)+c(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE loop12_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/loop12f90.f90 b/benchmarks/loop12f90.f90 deleted file mode 100644 index 479305a6..00000000 --- a/benchmarks/loop12f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop12_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - y = u+a; x = a+b+c+d - RETURN - END - - - SUBROUTINE loop12_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/loop13.cpp b/benchmarks/loop13.cpp deleted file mode 100644 index 7688b5e3..00000000 --- a/benchmarks/loop13.cpp +++ /dev/null @@ -1,527 +0,0 @@ - -// loop13 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop13_f77 loop13_f77_ - #define loop13_f77overhead loop13_f77overhead_ - #define loop13_f90 loop13_f90_ - #define loop13_f90overhead loop13_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop13_f77 loop13_f77__ - #define loop13_f77overhead loop13_f77overhead__ - #define loop13_f90 loop13_f90__ - #define loop13_f90overhead loop13_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop13_f77 LOOP13_F77 - #define loop13_f77overhead LOOP13_F77OVERHEAD - #define loop13_f90 LOOP13_F90 - #define loop13_f90overhead LOOP13_F90OVERHEAD -#endif - -extern "C" { - void loop13_f77(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop13_f77overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop13_f90(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - void loop13_f90overhead(const int& N, double* y, double* x, double* a, double* b, double* c, double* d, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop13: $x = $a+$b+$c+$d; $y = u+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop13.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i)+d(tensor::i); y = u+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+6); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+6); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(5,N+5-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c+d; y = u+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop13_f77(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop13_f77overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop13_f90(N, y, x, a, b, c, d, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop13_f90overhead(N, y, x, a, b, c, d, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop13f.f b/benchmarks/loop13f.f deleted file mode 100644 index b9b97814..00000000 --- a/benchmarks/loop13f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop13_F77(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i)+d(i); y(i) = u+d(i); - END DO - RETURN - END - - - SUBROUTINE loop13_F77Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - RETURN - END diff --git a/benchmarks/loop13f90.f90 b/benchmarks/loop13f90.f90 deleted file mode 100644 index dbfa0218..00000000 --- a/benchmarks/loop13f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop13_F90(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - x = a+b+c+d; y = u+d - RETURN - END - - - SUBROUTINE loop13_F90Overhead(N, y, x, a, b, c, d, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), d(N), u - - RETURN - END diff --git a/benchmarks/loop14.cpp b/benchmarks/loop14.cpp deleted file mode 100644 index ba1cbbea..00000000 --- a/benchmarks/loop14.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// loop14 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop14_f77 loop14_f77_ - #define loop14_f77overhead loop14_f77overhead_ - #define loop14_f90 loop14_f90_ - #define loop14_f90overhead loop14_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop14_f77 loop14_f77__ - #define loop14_f77overhead loop14_f77overhead__ - #define loop14_f90 loop14_f90__ - #define loop14_f90overhead loop14_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop14_f77 LOOP14_F77 - #define loop14_f77overhead LOOP14_F77OVERHEAD - #define loop14_f90 LOOP14_F90 - #define loop14_f90overhead LOOP14_F90OVERHEAD -#endif - -extern "C" { - void loop14_f77(const int& N, double* y, double* x, double* a, double* b); - void loop14_f77overhead(const int& N, double* y, double* x, double* a, double* b); - void loop14_f90(const int& N, double* y, double* x, double* a, double* b); - void loop14_f90overhead(const int& N, double* y, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop14: $x = $a+$b; $y = $a-$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop14.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i); y = a(tensor::i)-b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; y = a-b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop14_f77(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop14_f77overhead(N, y, x, a, b); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop14_f90(N, y, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop14_f90overhead(N, y, x, a, b); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop14f.f b/benchmarks/loop14f.f deleted file mode 100644 index f4c348bd..00000000 --- a/benchmarks/loop14f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop14_F77(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); y(i) = a(i)-b(i); - END DO - RETURN - END - - - SUBROUTINE loop14_F77Overhead(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/loop14f90.f90 b/benchmarks/loop14f90.f90 deleted file mode 100644 index d513b0ae..00000000 --- a/benchmarks/loop14f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop14_F90(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - - x = a+b; y = a-b - RETURN - END - - - SUBROUTINE loop14_F90Overhead(N, y, x, a, b) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/loop15.cpp b/benchmarks/loop15.cpp deleted file mode 100644 index 10d05874..00000000 --- a/benchmarks/loop15.cpp +++ /dev/null @@ -1,482 +0,0 @@ - -// loop15 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop15_f77 loop15_f77_ - #define loop15_f77overhead loop15_f77overhead_ - #define loop15_f90 loop15_f90_ - #define loop15_f90overhead loop15_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop15_f77 loop15_f77__ - #define loop15_f77overhead loop15_f77overhead__ - #define loop15_f90 loop15_f90__ - #define loop15_f90overhead loop15_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop15_f77 LOOP15_F77 - #define loop15_f77overhead LOOP15_F77OVERHEAD - #define loop15_f90 LOOP15_F90 - #define loop15_f90overhead LOOP15_F90OVERHEAD -#endif - -extern "C" { - void loop15_f77(const int& N, double* x, double* a, double* b, double* c); - void loop15_f77overhead(const int& N, double* x, double* a, double* b, double* c); - void loop15_f90(const int& N, double* x, double* a, double* b, double* c); - void loop15_f90overhead(const int& N, double* x, double* a, double* b, double* c); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop15: $x = $c + $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop15.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c(tensor::i) + a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+4); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+4); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = c + a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop15_f77(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop15_f77overhead(N, x, a, b, c); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop15_f90(N, x, a, b, c); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop15_f90overhead(N, x, a, b, c); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop15f.f b/benchmarks/loop15f.f deleted file mode 100644 index aa3622b5..00000000 --- a/benchmarks/loop15f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop15_F77(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - - DO i=1,N - x(i) = c(i) + a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE loop15_F77Overhead(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - RETURN - END diff --git a/benchmarks/loop15f90.f90 b/benchmarks/loop15f90.f90 deleted file mode 100644 index 45c33e18..00000000 --- a/benchmarks/loop15f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop15_F90(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - - x = c + a*b - RETURN - END - - - SUBROUTINE loop15_F90Overhead(N, x, a, b, c) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N) - - RETURN - END diff --git a/benchmarks/loop16.cpp b/benchmarks/loop16.cpp deleted file mode 100644 index 37e0cd53..00000000 --- a/benchmarks/loop16.cpp +++ /dev/null @@ -1,505 +0,0 @@ - -// loop16 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop16_f77 loop16_f77_ - #define loop16_f77overhead loop16_f77overhead_ - #define loop16_f90 loop16_f90_ - #define loop16_f90overhead loop16_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop16_f77 loop16_f77__ - #define loop16_f77overhead loop16_f77overhead__ - #define loop16_f90 loop16_f90__ - #define loop16_f90overhead loop16_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop16_f77 LOOP16_F77 - #define loop16_f77overhead LOOP16_F77OVERHEAD - #define loop16_f90 LOOP16_F90 - #define loop16_f90overhead LOOP16_F90OVERHEAD -#endif - -extern "C" { - void loop16_f77(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - void loop16_f77overhead(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - void loop16_f90(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - void loop16_f90overhead(const int& N, double* y, double* x, double* a, double* b, double* c, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop16: $x = $a+$b+$c; $y = $x+$c+u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop16.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i)+c(tensor::i); y = x(tensor::i)+c(tensor::i)+u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+5); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+5); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(4,N+4-1))); - initializeRandomDouble(c.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b+c; y = x+c+u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop16_f77(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop16_f77overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop16_f90(N, y, x, a, b, c, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop16_f90overhead(N, y, x, a, b, c, u); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - delete [] c; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop16f.f b/benchmarks/loop16f.f deleted file mode 100644 index f96274db..00000000 --- a/benchmarks/loop16f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop16_F77(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - - DO i=1,N - x(i) = a(i)+b(i)+c(i); y(i) = x(i)+c(i)+u; - END DO - RETURN - END - - - SUBROUTINE loop16_F77Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - RETURN - END diff --git a/benchmarks/loop16f90.f90 b/benchmarks/loop16f90.f90 deleted file mode 100644 index f81226c2..00000000 --- a/benchmarks/loop16f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop16_F90(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - - x = a+b+c; y = x+c+u - RETURN - END - - - SUBROUTINE loop16_F90Overhead(N, y, x, a, b, c, u) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), c(N), u - - RETURN - END diff --git a/benchmarks/loop17.cpp b/benchmarks/loop17.cpp deleted file mode 100644 index 108110b1..00000000 --- a/benchmarks/loop17.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop17 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop17_f77 loop17_f77_ - #define loop17_f77overhead loop17_f77overhead_ - #define loop17_f90 loop17_f90_ - #define loop17_f90overhead loop17_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop17_f77 loop17_f77__ - #define loop17_f77overhead loop17_f77overhead__ - #define loop17_f90 loop17_f90__ - #define loop17_f90overhead loop17_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop17_f77 LOOP17_F77 - #define loop17_f77overhead LOOP17_F77OVERHEAD - #define loop17_f90 LOOP17_F90 - #define loop17_f90overhead LOOP17_F90OVERHEAD -#endif - -extern "C" { - void loop17_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop17_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop17_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop17_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop17: $x = ($a+$b)*($c+$d)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop17.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a(tensor::i)+b(tensor::i))*(c(tensor::i)+d(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (a+b)*(c+d); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop17_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop17_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop17_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop17_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop17f.f b/benchmarks/loop17f.f deleted file mode 100644 index 795c1f93..00000000 --- a/benchmarks/loop17f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop17_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = (a(i)+b(i))*(c(i)+d(i)); - END DO - RETURN - END - - - SUBROUTINE loop17_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop17f90.f90 b/benchmarks/loop17f90.f90 deleted file mode 100644 index 64beabe2..00000000 --- a/benchmarks/loop17f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop17_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = (a+b)*(c+d) - RETURN - END - - - SUBROUTINE loop17_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop18.cpp b/benchmarks/loop18.cpp deleted file mode 100644 index b7890461..00000000 --- a/benchmarks/loop18.cpp +++ /dev/null @@ -1,462 +0,0 @@ - -// loop18 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop18_f77 loop18_f77_ - #define loop18_f77overhead loop18_f77overhead_ - #define loop18_f90 loop18_f90_ - #define loop18_f90overhead loop18_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop18_f77 loop18_f77__ - #define loop18_f77overhead loop18_f77overhead__ - #define loop18_f90 loop18_f90__ - #define loop18_f90overhead loop18_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop18_f77 LOOP18_F77 - #define loop18_f77overhead LOOP18_F77OVERHEAD - #define loop18_f90 LOOP18_F90 - #define loop18_f90overhead LOOP18_F90OVERHEAD -#endif - -extern "C" { - void loop18_f77(const int& N, double* x, double* a, double* b, const double& u, const double& v); - void loop18_f77overhead(const int& N, double* x, double* a, double* b, const double& u, const double& v); - void loop18_f90(const int& N, double* x, double* a, double* b, const double& u, const double& v); - void loop18_f90overhead(const int& N, double* x, double* a, double* b, const double& u, const double& v); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v); -void F77Version(BenchmarkExt& bench, double u, double v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop18: $x = (u+$a)*(v+$b)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop18.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a(tensor::i))*(v+b(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = (u+a)*(v+b); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop18_f77(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop18_f77overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop18_f90(N, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop18_f90overhead(N, x, a, b, u, v); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop18f.f b/benchmarks/loop18f.f deleted file mode 100644 index 9bc77997..00000000 --- a/benchmarks/loop18f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop18_F77(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = (u+a(i))*(v+b(i)); - END DO - RETURN - END - - - SUBROUTINE loop18_F77Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/loop18f90.f90 b/benchmarks/loop18f90.f90 deleted file mode 100644 index 1773a4a0..00000000 --- a/benchmarks/loop18f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop18_F90(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - - x = (u+a)*(v+b) - RETURN - END - - - SUBROUTINE loop18_F90Overhead(N, x, a, b, u, v) - INTEGER i, N - REAL*8 x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/loop19.cpp b/benchmarks/loop19.cpp deleted file mode 100644 index 10aa3136..00000000 --- a/benchmarks/loop19.cpp +++ /dev/null @@ -1,484 +0,0 @@ - -// loop19 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop19_f77 loop19_f77_ - #define loop19_f77overhead loop19_f77overhead_ - #define loop19_f90 loop19_f90_ - #define loop19_f90overhead loop19_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop19_f77 loop19_f77__ - #define loop19_f77overhead loop19_f77overhead__ - #define loop19_f90 loop19_f90__ - #define loop19_f90overhead loop19_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop19_f77 LOOP19_F77 - #define loop19_f77overhead LOOP19_F77OVERHEAD - #define loop19_f90 LOOP19_F90 - #define loop19_f90overhead LOOP19_F90OVERHEAD -#endif - -extern "C" { - void loop19_f77(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - void loop19_f77overhead(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - void loop19_f90(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - void loop19_f90overhead(const int& N, double* y, double* x, double* a, double* b, const double& u, const double& v); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion(BenchmarkExt& bench, double u, double v); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v); -void F77Version(BenchmarkExt& bench, double u, double v); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop19: $x = u*$a; $y = v*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - - - ArrayVersion(bench, u, v); - ArrayVersion_unaligned(bench, u, v); - ArrayVersion_misaligned(bench, u, v); - ArrayVersion_index(bench, u, v); - //doTinyVectorVersion(bench, u, v); - F77Version(bench, u, v); -#ifdef FORTRAN_90 - F90Version(bench, u, v); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v); -#endif - - if(runvector) - VectorVersion(bench, u, v); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop19.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector y(N); - initializeRandomDouble(y.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a(tensor::i); y = v*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array yfill(N+4); - Array y(yfill(Range(0,N+0-1))); - initializeRandomDouble(y.dataFirst(), N); - - Array xfill(N+4); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+4); - Array a(afill(Range(2,N+2-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+4); - Array b(bfill(Range(3,N+3-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray y(N); - initializeRandomDouble(y, N); - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*a; y = v*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop19_f77(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop19_f77overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* y = new double[N]; - initializeRandomDouble(y, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop19_f90(N, y, x, a, b, u, v); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop19_f90overhead(N, y, x, a, b, u, v); - - bench.stopOverhead(); - delete [] y; - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop19f.f b/benchmarks/loop19f.f deleted file mode 100644 index d27ee541..00000000 --- a/benchmarks/loop19f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop19_F77(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - - DO i=1,N - x(i) = u*a(i); y(i) = v*b(i); - END DO - RETURN - END - - - SUBROUTINE loop19_F77Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - RETURN - END diff --git a/benchmarks/loop19f90.f90 b/benchmarks/loop19f90.f90 deleted file mode 100644 index ab134129..00000000 --- a/benchmarks/loop19f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop19_F90(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - - x = u*a; y = v*b - RETURN - END - - - SUBROUTINE loop19_F90Overhead(N, y, x, a, b, u, v) - INTEGER i, N - REAL*8 y(N), x(N), a(N), b(N), u, v - - RETURN - END diff --git a/benchmarks/loop1f.f b/benchmarks/loop1f.f deleted file mode 100644 index 0f215ed7..00000000 --- a/benchmarks/loop1f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop1_F77(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - - DO i=1,N - x(i) = sqrt(y(i)); - END DO - RETURN - END - - - SUBROUTINE loop1_F77Overhead(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - RETURN - END diff --git a/benchmarks/loop1f90.f90 b/benchmarks/loop1f90.f90 deleted file mode 100644 index a788a5af..00000000 --- a/benchmarks/loop1f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop1_F90(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - - x = sqrt(y) - RETURN - END - - - SUBROUTINE loop1_F90Overhead(N, x, y) - INTEGER i, N - REAL*8 x(N), y(N) - - RETURN - END diff --git a/benchmarks/loop2.cpp b/benchmarks/loop2.cpp deleted file mode 100644 index 1ea3e34e..00000000 --- a/benchmarks/loop2.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// loop2 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop2_f77 loop2_f77_ - #define loop2_f77overhead loop2_f77overhead_ - #define loop2_f90 loop2_f90_ - #define loop2_f90overhead loop2_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop2_f77 loop2_f77__ - #define loop2_f77overhead loop2_f77overhead__ - #define loop2_f90 loop2_f90__ - #define loop2_f90overhead loop2_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop2_f77 LOOP2_F77 - #define loop2_f77overhead LOOP2_F77OVERHEAD - #define loop2_f90 LOOP2_F90 - #define loop2_f90overhead LOOP2_F90OVERHEAD -#endif - -extern "C" { - void loop2_f77(const int& N, double* x, double* y, const double& u); - void loop2_f77overhead(const int& N, double* x, double* y, const double& u); - void loop2_f90(const int& N, double* x, double* y, const double& u); - void loop2_f90overhead(const int& N, double* x, double* y, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop2: $x = $y/u", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop2.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y(tensor::i)/u;; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = y/u; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop2_f77(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop2_f77overhead(N, x, y, u); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop2_f90(N, x, y, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop2_f90overhead(N, x, y, u); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop21.cpp b/benchmarks/loop21.cpp deleted file mode 100644 index 43d6fd2a..00000000 --- a/benchmarks/loop21.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop21 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop21_f77 loop21_f77_ - #define loop21_f77overhead loop21_f77overhead_ - #define loop21_f90 loop21_f90_ - #define loop21_f90overhead loop21_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop21_f77 loop21_f77__ - #define loop21_f77overhead loop21_f77overhead__ - #define loop21_f90 loop21_f90__ - #define loop21_f90overhead loop21_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop21_f77 LOOP21_F77 - #define loop21_f77overhead LOOP21_F77OVERHEAD - #define loop21_f90 LOOP21_F90 - #define loop21_f90overhead LOOP21_F90OVERHEAD -#endif - -extern "C" { - void loop21_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop21_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop21_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop21_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop21: $x = $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 3 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop21.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop21_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop21_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop21_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop21_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop21f.f b/benchmarks/loop21f.f deleted file mode 100644 index ce6ed7a1..00000000 --- a/benchmarks/loop21f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop21_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE loop21_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop21f90.f90 b/benchmarks/loop21f90.f90 deleted file mode 100644 index bc792a81..00000000 --- a/benchmarks/loop21f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop21_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = a*b + c*d - RETURN - END - - - SUBROUTINE loop21_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop22.cpp b/benchmarks/loop22.cpp deleted file mode 100644 index 60a2cfdb..00000000 --- a/benchmarks/loop22.cpp +++ /dev/null @@ -1,504 +0,0 @@ - -// loop22 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop22_f77 loop22_f77_ - #define loop22_f77overhead loop22_f77overhead_ - #define loop22_f90 loop22_f90_ - #define loop22_f90overhead loop22_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop22_f77 loop22_f77__ - #define loop22_f77overhead loop22_f77overhead__ - #define loop22_f90 loop22_f90__ - #define loop22_f90overhead loop22_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop22_f77 LOOP22_F77 - #define loop22_f77overhead LOOP22_F77OVERHEAD - #define loop22_f90 LOOP22_F90 - #define loop22_f90overhead LOOP22_F90OVERHEAD -#endif - -extern "C" { - void loop22_f77(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop22_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop22_f90(const int& N, double* x, double* a, double* b, double* c, double* d); - void loop22_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop22: $x = $x + $a*$b + $c*$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop22.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i) + a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+5); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x + a*b + c*d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop22_f77(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop22_f77overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop22_f90(N, x, a, b, c, d); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop22_f90overhead(N, x, a, b, c, d); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop22f.f b/benchmarks/loop22f.f deleted file mode 100644 index 367bce0a..00000000 --- a/benchmarks/loop22f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop22_F77(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - DO i=1,N - x(i) = x(i) + a(i)*b(i) + c(i)*d(i); - END DO - RETURN - END - - - SUBROUTINE loop22_F77Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - RETURN - END diff --git a/benchmarks/loop22f90.f90 b/benchmarks/loop22f90.f90 deleted file mode 100644 index 516f5260..00000000 --- a/benchmarks/loop22f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop22_F90(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - x = x + a*b + c*d - RETURN - END - - - SUBROUTINE loop22_F90Overhead(N, x, a, b, c, d) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N) - - RETURN - END diff --git a/benchmarks/loop23.cpp b/benchmarks/loop23.cpp deleted file mode 100644 index 6ecc89a4..00000000 --- a/benchmarks/loop23.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// loop23 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop23_f77 loop23_f77_ - #define loop23_f77overhead loop23_f77overhead_ - #define loop23_f90 loop23_f90_ - #define loop23_f90overhead loop23_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop23_f77 loop23_f77__ - #define loop23_f77overhead loop23_f77overhead__ - #define loop23_f90 loop23_f90__ - #define loop23_f90overhead loop23_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop23_f77 LOOP23_F77 - #define loop23_f77overhead LOOP23_F77OVERHEAD - #define loop23_f90 LOOP23_F90 - #define loop23_f90overhead LOOP23_F90OVERHEAD -#endif - -extern "C" { - void loop23_f77(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop23_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop23_f90(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop23_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop23: $x = $a*$b + $c*$d; $y = $b+$d", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 4 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop23.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i) + c(tensor::i)*d(tensor::i); y = b(tensor::i)+d(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b + c*d; y = b+d; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop23_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop23_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop23_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop23_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop23f.f b/benchmarks/loop23f.f deleted file mode 100644 index 917847f4..00000000 --- a/benchmarks/loop23f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop23_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*b(i) + c(i)*d(i); y(i) = b(i)+d(i); - END DO - RETURN - END - - - SUBROUTINE loop23_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/loop23f90.f90 b/benchmarks/loop23f90.f90 deleted file mode 100644 index b0969fe4..00000000 --- a/benchmarks/loop23f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop23_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*b + c*d; y = b+d - RETURN - END - - - SUBROUTINE loop23_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/loop24.cpp b/benchmarks/loop24.cpp deleted file mode 100644 index ea64f9af..00000000 --- a/benchmarks/loop24.cpp +++ /dev/null @@ -1,526 +0,0 @@ - -// loop24 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop24_f77 loop24_f77_ - #define loop24_f77overhead loop24_f77overhead_ - #define loop24_f90 loop24_f90_ - #define loop24_f90overhead loop24_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop24_f77 loop24_f77__ - #define loop24_f77overhead loop24_f77overhead__ - #define loop24_f90 loop24_f90__ - #define loop24_f90overhead loop24_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop24_f77 LOOP24_F77 - #define loop24_f77overhead LOOP24_F77OVERHEAD - #define loop24_f90 LOOP24_F90 - #define loop24_f90overhead LOOP24_F90OVERHEAD -#endif - -extern "C" { - void loop24_f77(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop24_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop24_f90(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - void loop24_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* d, double* y); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop24: $x = $a*$c - $b*$c; $y = $a*$d + $b+$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop24.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector d(N); - initializeRandomDouble(d.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array d(N); - initializeRandomDouble(d.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*c(tensor::i) - b(tensor::i)*c(tensor::i); y = a(tensor::i)*d(tensor::i) + b(tensor::i)+c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+1); - Array d(dfill(Range(1,N))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+6); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+6); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+6); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+6); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array dfill(N+6); - Array d(dfill(Range(4,N+4-1))); - initializeRandomDouble(d.dataFirst(), N); - - Array yfill(N+6); - Array y(yfill(Range(5,N+5-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray d(N); - initializeRandomDouble(d, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*c - b*c; y = a*d + b+c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop24_f77(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop24_f77overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* d = new double[N]; - initializeRandomDouble(d, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop24_f90(N, x, a, b, c, d, y); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop24_f90overhead(N, x, a, b, c, d, y); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] d; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop24f.f b/benchmarks/loop24f.f deleted file mode 100644 index bd66aaca..00000000 --- a/benchmarks/loop24f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop24_F77(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - DO i=1,N - x(i) = a(i)*c(i) - b(i)*c(i); y(i) = a(i)*d(i) + b(i)+c(i); - END DO - RETURN - END - - - SUBROUTINE loop24_F77Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - RETURN - END diff --git a/benchmarks/loop24f90.f90 b/benchmarks/loop24f90.f90 deleted file mode 100644 index 436950a1..00000000 --- a/benchmarks/loop24f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop24_F90(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - x = a*c - b*c; y = a*d + b+c - RETURN - END - - - SUBROUTINE loop24_F90Overhead(N, x, a, b, c, d, y) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), d(N), y(N) - - RETURN - END diff --git a/benchmarks/loop25.cpp b/benchmarks/loop25.cpp deleted file mode 100644 index e1ec38f2..00000000 --- a/benchmarks/loop25.cpp +++ /dev/null @@ -1,507 +0,0 @@ - -// loop25 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop25_f77 loop25_f77_ - #define loop25_f77overhead loop25_f77overhead_ - #define loop25_f90 loop25_f90_ - #define loop25_f90overhead loop25_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop25_f77 loop25_f77__ - #define loop25_f77overhead loop25_f77overhead__ - #define loop25_f90 loop25_f90__ - #define loop25_f90overhead loop25_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop25_f77 LOOP25_F77 - #define loop25_f77overhead LOOP25_F77OVERHEAD - #define loop25_f90 LOOP25_F90 - #define loop25_f90overhead LOOP25_F90OVERHEAD -#endif - -extern "C" { - void loop25_f77(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - void loop25_f77overhead(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - void loop25_f90(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - void loop25_f90overhead(const int& N, double* x, double* a, double* b, double* c, double* y, const double& u, const double& v, const double& w); - -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w); -void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w); -void doTinyVectorVersion(BenchmarkExt& bench, double u, double v, double w); -void F77Version(BenchmarkExt& bench, double u, double v, double w); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop25: $x = u*$b; $y = v*$b + w*$a + u*$c", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 6 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - double v = 0.39123982498157938742; - double w = 0.39123982498157938742; - - - ArrayVersion(bench, u, v, w); - ArrayVersion_unaligned(bench, u, v, w); - ArrayVersion_misaligned(bench, u, v, w); - ArrayVersion_index(bench, u, v, w); - //doTinyVectorVersion(bench, u, v, w); - F77Version(bench, u, v, w); -#ifdef FORTRAN_90 - F90Version(bench, u, v, w); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u, v, w); -#endif - - if(runvector) - VectorVersion(bench, u, v, w); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop25.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - Vector c(N); - initializeRandomDouble(c.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - Array c(N); - initializeRandomDouble(c.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b(tensor::i); y = v*b(tensor::i) + w*a(tensor::i) + u*c(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+1); - Array c(cfill(Range(1,N))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+5); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+5); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+5); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - Array cfill(N+5); - Array c(cfill(Range(3,N+3-1))); - initializeRandomDouble(c.dataFirst(), N); - - Array yfill(N+5); - Array y(yfill(Range(4,N+4-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - valarray c(N); - initializeRandomDouble(c, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u*b; y = v*b + w*a + u*c; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop25_f77(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop25_f77overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u, double v, double w) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - double* c = new double[N]; - initializeRandomDouble(c, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop25_f90(N, x, a, b, c, y, u, v, w); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop25_f90overhead(N, x, a, b, c, y, u, v, w); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - delete [] c; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop25f.f b/benchmarks/loop25f.f deleted file mode 100644 index fac6bac0..00000000 --- a/benchmarks/loop25f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop25_F77(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - - DO i=1,N - x(i) = u*b(i); y(i) = v*b(i) + w*a(i) + u*c(i); - END DO - RETURN - END - - - SUBROUTINE loop25_F77Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - RETURN - END diff --git a/benchmarks/loop25f90.f90 b/benchmarks/loop25f90.f90 deleted file mode 100644 index 8835ceed..00000000 --- a/benchmarks/loop25f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop25_F90(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - - x = u*b; y = v*b + w*a + u*c - RETURN - END - - - SUBROUTINE loop25_F90Overhead(N, x, a, b, c, y, u, v, w) - INTEGER i, N - REAL*8 x(N), a(N), b(N), c(N), y(N), u, v, w - - RETURN - END diff --git a/benchmarks/loop2f.f b/benchmarks/loop2f.f deleted file mode 100644 index 36d25931..00000000 --- a/benchmarks/loop2f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop2_F77(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - - DO i=1,N - x(i) = y(i)/u; - END DO - RETURN - END - - - SUBROUTINE loop2_F77Overhead(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - RETURN - END diff --git a/benchmarks/loop2f90.f90 b/benchmarks/loop2f90.f90 deleted file mode 100644 index d82513d3..00000000 --- a/benchmarks/loop2f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop2_F90(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - - x = y/u - RETURN - END - - - SUBROUTINE loop2_F90Overhead(N, x, y, u) - INTEGER i, N - REAL*8 x(N), y(N), u - - RETURN - END diff --git a/benchmarks/loop3.cpp b/benchmarks/loop3.cpp deleted file mode 100644 index ef5bc3a3..00000000 --- a/benchmarks/loop3.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// loop3 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop3_f77 loop3_f77_ - #define loop3_f77overhead loop3_f77overhead_ - #define loop3_f90 loop3_f90_ - #define loop3_f90overhead loop3_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop3_f77 loop3_f77__ - #define loop3_f77overhead loop3_f77overhead__ - #define loop3_f90 loop3_f90__ - #define loop3_f90overhead loop3_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop3_f77 LOOP3_F77 - #define loop3_f77overhead LOOP3_F77OVERHEAD - #define loop3_f90 LOOP3_F90 - #define loop3_f90overhead LOOP3_F90OVERHEAD -#endif - -extern "C" { - void loop3_f77(const int& N, double* x, double* y, const double& a); - void loop3_f77overhead(const int& N, double* x, double* y, const double& a); - void loop3_f90(const int& N, double* x, double* y, const double& a); - void loop3_f90overhead(const int& N, double* x, double* y, const double& a); - -} - -void VectorVersion(BenchmarkExt& bench, double a); -void ArrayVersion(BenchmarkExt& bench, double a); -void ArrayVersion_unaligned(BenchmarkExt& bench, double a); -void ArrayVersion_misaligned(BenchmarkExt& bench, double a); -void ArrayVersion_index(BenchmarkExt& bench, double a); -void doTinyVectorVersion(BenchmarkExt& bench, double a); -void F77Version(BenchmarkExt& bench, double a); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double a); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double a); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop3: $y = $y + a*$x", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 2 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double a = 0.39123982498157938742; - - - ArrayVersion(bench, a); - ArrayVersion_unaligned(bench, a); - ArrayVersion_misaligned(bench, a); - ArrayVersion_index(bench, a); - //doTinyVectorVersion(bench, a); - F77Version(bench, a); -#ifdef FORTRAN_90 - F90Version(bench, a); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, a); -#endif - - if(runvector) - VectorVersion(bench, a); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop3.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector y(N); - initializeRandomDouble(y.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array y(N); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y(tensor::i) + a*x(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+1); - Array y(yfill(Range(1,N))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array yfill(N+2); - Array y(yfill(Range(1,N+1-1))); - initializeRandomDouble(y.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray y(N); - initializeRandomDouble(y, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - y = y + a*x; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop3_f77(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop3_f77overhead(N, x, y, a); - - bench.stopOverhead(); - - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double a) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* y = new double[N]; - initializeRandomDouble(y, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop3_f90(N, x, y, a); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop3_f90overhead(N, x, y, a); - - bench.stopOverhead(); - delete [] x; - delete [] y; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop36.cpp b/benchmarks/loop36.cpp deleted file mode 100644 index 1bf13c21..00000000 --- a/benchmarks/loop36.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// loop36 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop36_f77 loop36_f77_ - #define loop36_f77overhead loop36_f77overhead_ - #define loop36_f90 loop36_f90_ - #define loop36_f90overhead loop36_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop36_f77 loop36_f77__ - #define loop36_f77overhead loop36_f77overhead__ - #define loop36_f90 loop36_f90__ - #define loop36_f90overhead loop36_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop36_f77 LOOP36_F77 - #define loop36_f77overhead LOOP36_F77OVERHEAD - #define loop36_f90 LOOP36_F90 - #define loop36_f90overhead LOOP36_F90OVERHEAD -#endif - -extern "C" { - void loop36_f77(const int& N, double* x, double* e); - void loop36_f77overhead(const int& N, double* x, double* e); - void loop36_f90(const int& N, double* x, double* e); - void loop36_f90overhead(const int& N, double* x, double* e); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop36: $x = exp($e)", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop36.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector e(N); - initializeRandomDouble(e.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array e(N); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e(tensor::i));; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+1); - Array e(efill(Range(1,N))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+2); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array efill(N+2); - Array e(efill(Range(1,N+1-1))); - initializeRandomDouble(e.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray e(N); - initializeRandomDouble(e, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = exp(e); - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* e = new double[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop36_f77(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop36_f77overhead(N, x, e); - - bench.stopOverhead(); - - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* e = new double[N]; - initializeRandomDouble(e, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop36_f90(N, x, e); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop36_f90overhead(N, x, e); - - bench.stopOverhead(); - delete [] x; - delete [] e; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop36f.f b/benchmarks/loop36f.f deleted file mode 100644 index 225e734c..00000000 --- a/benchmarks/loop36f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop36_F77(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - - DO i=1,N - x(i) = exp(e(i)); - END DO - RETURN - END - - - SUBROUTINE loop36_F77Overhead(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - RETURN - END diff --git a/benchmarks/loop36f90.f90 b/benchmarks/loop36f90.f90 deleted file mode 100644 index af9c4b10..00000000 --- a/benchmarks/loop36f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop36_F90(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - - x = exp(e) - RETURN - END - - - SUBROUTINE loop36_F90Overhead(N, x, e) - INTEGER i, N - REAL*8 x(N), e(N) - - RETURN - END diff --git a/benchmarks/loop3f.f b/benchmarks/loop3f.f deleted file mode 100644 index ca99521c..00000000 --- a/benchmarks/loop3f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop3_F77(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - - DO i=1,N - y(i) = y(i) + a*x(i); - END DO - RETURN - END - - - SUBROUTINE loop3_F77Overhead(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - RETURN - END diff --git a/benchmarks/loop3f90.f90 b/benchmarks/loop3f90.f90 deleted file mode 100644 index 0a676018..00000000 --- a/benchmarks/loop3f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop3_F90(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - - y = y + a*x - RETURN - END - - - SUBROUTINE loop3_F90Overhead(N, x, y, a) - INTEGER i, N - REAL*8 x(N), y(N), a - - RETURN - END diff --git a/benchmarks/loop4.cpp b/benchmarks/loop4.cpp deleted file mode 100644 index eac7b78d..00000000 --- a/benchmarks/loop4.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// In KAI C++ 3.2, restrict causes problems for copy propagation. -// Temporary fix: disable restrict - -#define BZ_DISABLE_RESTRICT - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop4_f77 loop4_f77_ - #define loop4_f90 loop4_f90_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop4_f77 loop4_f77__ - #define loop4_f90 loop4_f90__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop4_f77 LOOP4_F77 - #define loop4_f90 LOOP4_F90 -#endif - -extern "C" { - void loop4_f77(const int& niters, const int& N, double* x, double* a, double* b); - - void loop4_f90(const int& niters, const int& N, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -void F90Version(BenchmarkExt& bench); - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -int main() -{ -#ifdef BENCHMARK_VALARRAY - int numBenchmarks = 5; -#else - int numBenchmarks = 4; -#endif - - BenchmarkExt bench("$x=$a+$b", numBenchmarks); - - const int numSizes = 19; - bench.setNumParameters(numSizes); - bench.setRateDescription("Mflops/s"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters[i] = pow(10.0, (i+1)/4.0); - iters[i] = 50000000L / parameters[i]; - if (iters[i] < 2) - iters[i] = 2; - flops[i] = 1 * parameters[i]; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); - - - VectorVersion(bench); - ArrayVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop4.m"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements, int stride = 1) -{ - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i*stride] = rnd.random(); -} - -template -void initializeArray(T& array, int numElements) -{ - static Random rnd; - - for (size_t i=0; i < numElements; ++i) - array[i] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << "Vector: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Vector x(N); - initializeArray(x); - Vector a(N); - initializeArray(a); - Vector b(N); - initializeArray(b); - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=a+b; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << "Array: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array x(N); - initializeArray(x); - Array a(N); - initializeArray(a); - Array b(N); - initializeArray(b); - - bench.start(); - for (long i=0; i < iters; ++i) - { - x=a+b; - } - bench.stop(); - } - - bench.endImplementation(); -} - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - bench.start(); - loop4_f77(iters, N, x, a, b); - bench.stop(); - - delete [] x; - delete [] a; - delete [] b; - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - bench.start(); - loop4_f90(iters, N, x, a, b); - bench.stop(); - - delete [] x; - delete [] a; - delete [] b; - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/loop4f.f b/benchmarks/loop4f.f deleted file mode 100644 index 234d46af..00000000 --- a/benchmarks/loop4f.f +++ /dev/null @@ -1,12 +0,0 @@ - SUBROUTINE loop4_F77(niters, N, x, a, b) - INTEGER i, iter, niters, N - REAL*8 x(N), a(N), b(N) - - DO iter=1,niters - DO i=1,N - x(i)=a(i)+b(i) - END DO - END DO - - RETURN - END diff --git a/benchmarks/loop4f90.f90 b/benchmarks/loop4f90.f90 deleted file mode 100644 index b611208e..00000000 --- a/benchmarks/loop4f90.f90 +++ /dev/null @@ -1,10 +0,0 @@ - SUBROUTINE loop4_F90(niters, N, x, a, b) - INTEGER i, iter, niters, N - REAL*8 x(N), a(N), b(N) - - DO iter=1,niters - x=a+b - END DO - - RETURN - END diff --git a/benchmarks/loop5.cpp b/benchmarks/loop5.cpp deleted file mode 100644 index 6574baad..00000000 --- a/benchmarks/loop5.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// loop5 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop5_f77 loop5_f77_ - #define loop5_f77overhead loop5_f77overhead_ - #define loop5_f90 loop5_f90_ - #define loop5_f90overhead loop5_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop5_f77 loop5_f77__ - #define loop5_f77overhead loop5_f77overhead__ - #define loop5_f90 loop5_f90__ - #define loop5_f90overhead loop5_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop5_f77 LOOP5_F77 - #define loop5_f77overhead LOOP5_F77OVERHEAD - #define loop5_f90 LOOP5_F90 - #define loop5_f90overhead LOOP5_F90OVERHEAD -#endif - -extern "C" { - void loop5_f77(const int& N, double* x, double* a, double* b); - void loop5_f77overhead(const int& N, double* x, double* a, double* b); - void loop5_f90(const int& N, double* x, double* a, double* b); - void loop5_f90overhead(const int& N, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop5: $x = $a+$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop5.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)+b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a+b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop5_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop5_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop5_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop5_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop5f.f b/benchmarks/loop5f.f deleted file mode 100644 index 8e22b5b2..00000000 --- a/benchmarks/loop5f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop5_F77(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)+b(i); - END DO - RETURN - END - - - SUBROUTINE loop5_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/loop5f90.f90 b/benchmarks/loop5f90.f90 deleted file mode 100644 index d770bac0..00000000 --- a/benchmarks/loop5f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop5_F90(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - x = a+b - RETURN - END - - - SUBROUTINE loop5_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/loop6.cpp b/benchmarks/loop6.cpp deleted file mode 100644 index 956f3f55..00000000 --- a/benchmarks/loop6.cpp +++ /dev/null @@ -1,460 +0,0 @@ - -// loop6 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop6_f77 loop6_f77_ - #define loop6_f77overhead loop6_f77overhead_ - #define loop6_f90 loop6_f90_ - #define loop6_f90overhead loop6_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop6_f77 loop6_f77__ - #define loop6_f77overhead loop6_f77overhead__ - #define loop6_f90 loop6_f90__ - #define loop6_f90overhead loop6_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop6_f77 LOOP6_F77 - #define loop6_f77overhead LOOP6_F77OVERHEAD - #define loop6_f90 LOOP6_F90 - #define loop6_f90overhead LOOP6_F90OVERHEAD -#endif - -extern "C" { - void loop6_f77(const int& N, double* x, double* a, double* b); - void loop6_f77overhead(const int& N, double* x, double* a, double* b); - void loop6_f90(const int& N, double* x, double* a, double* b); - void loop6_f90overhead(const int& N, double* x, double* a, double* b); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop6: $x = $a*$b", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop6.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector x(N); - initializeRandomDouble(x.data(), N); - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector b(N); - initializeRandomDouble(b.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array b(N); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a(tensor::i)*b(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+1); - Array b(bfill(Range(1,N))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array xfill(N+3); - Array x(xfill(Range(0,N+0-1))); - initializeRandomDouble(x.dataFirst(), N); - - Array afill(N+3); - Array a(afill(Range(1,N+1-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array bfill(N+3); - Array b(bfill(Range(2,N+2-1))); - initializeRandomDouble(b.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray x(N); - initializeRandomDouble(x, N); - valarray a(N); - initializeRandomDouble(a, N); - valarray b(N); - initializeRandomDouble(b, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = a*b; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop6_f77(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop6_f77overhead(N, x, a, b); - - bench.stopOverhead(); - - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* x = new double[N]; - initializeRandomDouble(x, N); - double* a = new double[N]; - initializeRandomDouble(a, N); - double* b = new double[N]; - initializeRandomDouble(b, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop6_f90(N, x, a, b); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop6_f90overhead(N, x, a, b); - - bench.stopOverhead(); - delete [] x; - delete [] a; - delete [] b; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop6f.f b/benchmarks/loop6f.f deleted file mode 100644 index 306ac241..00000000 --- a/benchmarks/loop6f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop6_F77(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - DO i=1,N - x(i) = a(i)*b(i); - END DO - RETURN - END - - - SUBROUTINE loop6_F77Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - RETURN - END diff --git a/benchmarks/loop6f90.f90 b/benchmarks/loop6f90.f90 deleted file mode 100644 index 0b0800cc..00000000 --- a/benchmarks/loop6f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop6_F90(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - x = a*b - RETURN - END - - - SUBROUTINE loop6_F90Overhead(N, x, a, b) - INTEGER i, N - REAL*8 x(N), a(N), b(N) - - RETURN - END diff --git a/benchmarks/loop8.cpp b/benchmarks/loop8.cpp deleted file mode 100644 index 3c735f9e..00000000 --- a/benchmarks/loop8.cpp +++ /dev/null @@ -1,439 +0,0 @@ - -// loop8 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop8_f77 loop8_f77_ - #define loop8_f77overhead loop8_f77overhead_ - #define loop8_f90 loop8_f90_ - #define loop8_f90overhead loop8_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop8_f77 loop8_f77__ - #define loop8_f77overhead loop8_f77overhead__ - #define loop8_f90 loop8_f90__ - #define loop8_f90overhead loop8_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop8_f77 LOOP8_F77 - #define loop8_f77overhead LOOP8_F77OVERHEAD - #define loop8_f90 LOOP8_F90 - #define loop8_f90overhead LOOP8_F90OVERHEAD -#endif - -extern "C" { - void loop8_f77(const int& N, double* a, double* x, const double& u); - void loop8_f77overhead(const int& N, double* a, double* x, const double& u); - void loop8_f90(const int& N, double* a, double* x, const double& u); - void loop8_f90overhead(const int& N, double* a, double* x, const double& u); - -} - -void VectorVersion(BenchmarkExt& bench, double u); -void ArrayVersion(BenchmarkExt& bench, double u); -void ArrayVersion_unaligned(BenchmarkExt& bench, double u); -void ArrayVersion_misaligned(BenchmarkExt& bench, double u); -void ArrayVersion_index(BenchmarkExt& bench, double u); -void doTinyVectorVersion(BenchmarkExt& bench, double u); -void F77Version(BenchmarkExt& bench, double u); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop8: $x = u/$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - double u = 0.39123982498157938742; - - - ArrayVersion(bench, u); - ArrayVersion_unaligned(bench, u); - ArrayVersion_misaligned(bench, u); - ArrayVersion_index(bench, u); - //doTinyVectorVersion(bench, u); - F77Version(bench, u); -#ifdef FORTRAN_90 - F90Version(bench, u); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench, u); -#endif - - if(runvector) - VectorVersion(bench, u); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop8.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = u/a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop8_f77(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop8_f77overhead(N, a, x, u); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench, double u) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop8_f90(N, a, x, u); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop8_f90overhead(N, a, x, u); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop8f.f b/benchmarks/loop8f.f deleted file mode 100644 index 2dca941a..00000000 --- a/benchmarks/loop8f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop8_F77(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - - DO i=1,N - x(i) = u/a(i); - END DO - RETURN - END - - - SUBROUTINE loop8_F77Overhead(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - RETURN - END diff --git a/benchmarks/loop8f90.f90 b/benchmarks/loop8f90.f90 deleted file mode 100644 index 1c28528d..00000000 --- a/benchmarks/loop8f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop8_F90(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - - x = u/a - RETURN - END - - - SUBROUTINE loop8_F90Overhead(N, a, x, u) - INTEGER i, N - REAL*8 a(N), x(N), u - - RETURN - END diff --git a/benchmarks/loop9.cpp b/benchmarks/loop9.cpp deleted file mode 100644 index c1774656..00000000 --- a/benchmarks/loop9.cpp +++ /dev/null @@ -1,438 +0,0 @@ - -// loop9 generated by makeloops.py Thu Jun 30 16:44:56 2011 - -#include -#include -#include -#include - -#ifdef BZ_HAVE_VALARRAY - #define BENCHMARK_VALARRAY -#endif - -#ifdef BENCHMARK_VALARRAY -#include -#endif - -namespace blitz { -extern void sink(); -} - -using namespace blitz; -using namespace std; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define loop9_f77 loop9_f77_ - #define loop9_f77overhead loop9_f77overhead_ - #define loop9_f90 loop9_f90_ - #define loop9_f90overhead loop9_f90overhead_ -#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES) - #define loop9_f77 loop9_f77__ - #define loop9_f77overhead loop9_f77overhead__ - #define loop9_f90 loop9_f90__ - #define loop9_f90overhead loop9_f90overhead__ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define loop9_f77 LOOP9_F77 - #define loop9_f77overhead LOOP9_F77OVERHEAD - #define loop9_f90 LOOP9_F90 - #define loop9_f90overhead LOOP9_F90OVERHEAD -#endif - -extern "C" { - void loop9_f77(const int& N, double* a, double* x); - void loop9_f77overhead(const int& N, double* a, double* x); - void loop9_f90(const int& N, double* a, double* x); - void loop9_f90overhead(const int& N, double* a, double* x); - -} - -void VectorVersion(BenchmarkExt& bench); -void ArrayVersion(BenchmarkExt& bench); -void ArrayVersion_unaligned(BenchmarkExt& bench); -void ArrayVersion_misaligned(BenchmarkExt& bench); -void ArrayVersion_index(BenchmarkExt& bench); -void doTinyVectorVersion(BenchmarkExt& bench); -void F77Version(BenchmarkExt& bench); -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench); -#endif -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench); -#endif - -const int numSizes = 80; -const bool runvector=false; // no point as long as Vector is Array<1> - -int main() -{ - int numBenchmarks = 5; - if (runvector) numBenchmarks++; -#ifdef BENCHMARK_VALARRAY - numBenchmarks++; -#endif -#ifdef FORTRAN_90 - numBenchmarks++; -#endif - - BenchmarkExt bench("loop9: $x = $x+$a", numBenchmarks); - - bench.setNumParameters(numSizes); - - Array parameters(numSizes); - Array iters(numSizes); - Array flops(numSizes); - - parameters=pow(pow(2.,0.25),tensor::i)+tensor::i; - flops = 1 * parameters; - iters = 100000000L / flops; - iters = where(iters<2, 2, iters); - cout << iters << endl; - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - bench.setDependentVariable("flops"); - bench.beginBenchmarking(); - - - - ArrayVersion(bench); - ArrayVersion_unaligned(bench); - ArrayVersion_misaligned(bench); - ArrayVersion_index(bench); - //doTinyVectorVersion(bench); - F77Version(bench); -#ifdef FORTRAN_90 - F90Version(bench); -#endif -#ifdef BENCHMARK_VALARRAY - ValarrayVersion(bench); -#endif - - if(runvector) - VectorVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("loop9.m"); - return 0; -} - -template -void initializeRandomDouble(T* data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -template -void initializeRandomDouble(valarray& data, int numElements, int stride = 1) -{ - ranlib::Uniform rnd; - - for (int i=0; i < numElements; ++i) - data[size_t(i*stride)] = rnd.random(); -} - -void VectorVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Vector"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Vector a(N); - initializeRandomDouble(a.data(), N); - Vector x(N); - initializeRandomDouble(x.data(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Array"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - - void ArrayVersion_index(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (indexexpr.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - Array a(N); - initializeRandomDouble(a.dataFirst(), N); - Array x(N); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x(tensor::i)+a(tensor::i);; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_unaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (unal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+1); - Array a(afill(Range(1,N))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+1); - Array x(xfill(Range(1,N))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - - void ArrayVersion_misaligned(BenchmarkExt& bench) -{ - bench.beginImplementation("Array (misal.)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - long iters = bench.getIterations(); - - cout << bench.currentImplementation() << ": N = " << N << endl; - - - Array afill(N+2); - Array a(afill(Range(0,N+0-1))); - initializeRandomDouble(a.dataFirst(), N); - - Array xfill(N+2); - Array x(xfill(Range(1,N+1-1))); - initializeRandomDouble(x.dataFirst(), N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - - bench.stopOverhead(); - } - - bench.endImplementation(); -} - -#ifdef BENCHMARK_VALARRAY -void ValarrayVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("valarray"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - long iters = bench.getIterations(); - - valarray a(N); - initializeRandomDouble(a, N); - valarray x(N); - initializeRandomDouble(x, N); - - - bench.start(); - for (long i=0; i < iters; ++i) - { - x = x+a; - sink(); - } - bench.stop(); - - bench.startOverhead(); - for (long i=0; i < iters; ++i) { - sink(); - } - bench.stopOverhead(); - } - - bench.endImplementation(); -} -#endif - -void F77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop9_f77(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop9_f77overhead(N, a, x); - - bench.stopOverhead(); - - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void F90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - cout << bench.currentImplementation() << ": N = " << N << endl; - - int iters = bench.getIterations(); - - double* a = new double[N]; - initializeRandomDouble(a, N); - double* x = new double[N]; - initializeRandomDouble(x, N); - - - bench.start(); - for (int iter=0; iter < iters; ++iter) - loop9_f90(N, a, x); - bench.stop(); - - bench.startOverhead(); - for (int iter=0; iter < iters; ++iter) - loop9_f90overhead(N, a, x); - - bench.stopOverhead(); - delete [] a; - delete [] x; - - } - - bench.endImplementation(); -} -#endif - diff --git a/benchmarks/loop9f.f b/benchmarks/loop9f.f deleted file mode 100644 index 9d03021a..00000000 --- a/benchmarks/loop9f.f +++ /dev/null @@ -1,17 +0,0 @@ - - SUBROUTINE loop9_F77(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - - DO i=1,N - x(i) = x(i)+a(i); - END DO - RETURN - END - - - SUBROUTINE loop9_F77Overhead(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - RETURN - END diff --git a/benchmarks/loop9f90.f90 b/benchmarks/loop9f90.f90 deleted file mode 100644 index 1c435b66..00000000 --- a/benchmarks/loop9f90.f90 +++ /dev/null @@ -1,16 +0,0 @@ - - SUBROUTINE loop9_F90(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - - x = x+a - RETURN - END - - - SUBROUTINE loop9_F90Overhead(N, a, x) - INTEGER i, N - REAL*8 a(N), x(N) - - RETURN - END diff --git a/benchmarks/loops.data b/benchmarks/loops.data deleted file mode 100644 index 5948e033..00000000 --- a/benchmarks/loops.data +++ /dev/null @@ -1,147 +0,0 @@ -begin loop1 -array x y -flops 1 -loop $x = sqrt($y) -end - -begin loop2 -array x y -scalar u -flops 1 -loop $x=$y/$u -end - -begin loop3 -array x y -scalar a -flops 2 -loop $y=$y+$a*$x -end - -begin loop5 -array x a b -flops 1 -loop $x=$a+$b -end - -begin loop6 -array x a b -flops 1 -loop $x=$a*$b -end - -begin loop8 -array a x -scalar u -flops 1 -loop $x=u/$a -end - -begin loop9 -array a x -flops 1 -loop $x=$x+$a -end - -begin loop10 -array x a b c -scalar u -flops 3 -loop $x=u+$a+$b+$c -end - -begin loop11 -array x a b c d -flops 3 -loop $x=$a+$b+$c+$d -end - -begin loop12 -array y a x b c d -scalar u -flops 4 -loop $y = u + $a; $x = $a + $b + $c + $d -end - -begin loop13 -array x a b c d y -scalar u -loop $x=$a+$b+$c+$d; $y=u+$d -flops 4 -end - -begin loop14 -array x a b y -loop $x=$a+$b; $y=$a-$b -flops 2 -end - -begin loop15 -array x c a b -flops 2 -loop $x=$c+$a*$b -flops 3 -end - -begin loop16 -array x a b c y -scalar u -loop $x=$a+$b+$c; $y=$x+$c+u -flops 4 -end - -begin loop17 -array x a b c d -flops 3 -loop $x=($a+$b)*($c+$d) -end - -begin loop18 -array x a b -scalar u v -flops 3 -loop $x = (u + $a) * (v + $b) -end - -begin loop19 -array x a b y -scalar u v -flops 2 -loop $x=u*$a; $y=v*$b -end - -begin loop21 -array x a b c d -flops 3 -loop $x = $a * $b + $c * $d -end - -begin loop22 -array x a b c d -flops 4 -loop $x = $x + $a * $b + $c * $d -end - -begin loop23 -array x a b c d y -flops 4 -loop $x=$a*$b+$c*$d; $y=$b+$d -end - -begin loop24 -array x a b c d y -flops 6 -loop $x=$a*$c-$b*$d; $y=$a*$d+$b*$c -end - -begin loop25 -array x a b c y -scalar u v w -loop $x=u*$b; $y=v*$b+w*$a+u*$c -end - -begin loop36 -array x e -flops 1 -loop $x = exp($e) -end diff --git a/benchmarks/loopstruct.cpp b/benchmarks/loopstruct.cpp deleted file mode 100644 index 705b1145..00000000 --- a/benchmarks/loopstruct.cpp +++ /dev/null @@ -1,172 +0,0 @@ -// CC -64 -LANG:std -LANG:restrict -Ofast -PHASE:clist -IPA=off -IPA:INLINE=off -// -// When compiled with the above options, these are the results on convex: -// func1: 34.6484 -// func2: 24.6603 -// func3: 17.2822 -// -// func1 is a simple C-style loop. -// func2 has the pointers stuck inside a struct. Prefetching no longer occurs. -// func3 has the pointers inside "iterator" structs, and a read is done -// using an inline operator(). This somehow results in the loop -// being unrolled only twice, instead of 4 times. - -#include -#include - -// Paste the Timer class in here so you don't have to have Blitz++ - -class Timer { - -public: - Timer() - { - state_ = uninitialized; - } - - void start() - { - state_ = running; - t1_ = systemTime(); - } - - void stop() - { - t2_ = systemTime(); - state_ = stopped; - } - -/* Compaq cxx compiler in ansi mode cannot print out long double type! */ -#if defined(__DECCXX) - double elapsedSeconds() -#else - long double elapsedSeconds() -#endif - { - return t2_ - t1_; - } - -private: - Timer(Timer&) { } - void operator=(Timer&) { } - - long double systemTime() - { - getrusage(RUSAGE_SELF, &resourceUsage_); - double seconds = resourceUsage_.ru_utime.tv_sec - + resourceUsage_.ru_stime.tv_sec; - double micros = resourceUsage_.ru_utime.tv_usec - + resourceUsage_.ru_stime.tv_usec; - return seconds + micros/1.0e6; - } - - enum { uninitialized, running, stopped } state_; - - struct rusage resourceUsage_; - long double t1_, t2_; -}; - - -// func1: Simple version - -void func1(double* restrict x, double* restrict y, double a, int N) -{ - for (int i=0; i < N; ++i) - y[i] += a*x[i]; -} - - -// func2: With pointers inside a struct - -struct A { - double* restrict x; - double* restrict y; - double a; - int N; -}; - -void func2(A& z) -{ - for (int i=0; i < z.N; ++i) - z.y[i] += z.a * z.x[i]; -} - - -// func3: with very simple "iterators" (the B struct). - -struct B { - int q; - double* restrict data; - double operator()(int i) - { return data[i]; } -}; - -struct C { - B x; - B y; - double a; - int N; -}; - -void func3(C& z) -{ - for (int i=0; i < z.N; ++i) - z.y.data[i] += z.a * z.x(i); -} - - -// Initialize array - -void init(double* x, int N) -{ - for (int i=0; i < N; ++i) - x[i] = 1.0; -} - -int main() -{ - Timer timer; - - int N = 1000000; - int iters = 20; - double Mflops = N * iters * 2 / 1000000.; - - double* x = new double[N]; - double* y = new double[N]; - double a = .14989182; - init(x,N); - init(y,N); - - timer.start(); - for (int i=0; i < iters; ++i) - func1(x,y,a,N); - timer.stop(); - - cout << "func1: " << Mflops/timer.elapsedSeconds() << endl; - - timer.start(); - A z; - z.x = x; - z.y = y; - z.a = a; - z.N = N; - for (int i=0; i < iters; ++i) - func2(z); - timer.stop(); - - cout << "func2: " << Mflops/timer.elapsedSeconds() << endl; - - timer.start(); - { - C z; - z.x.data = x; - z.y.data = y; - z.a = a; - z.N = N; - for (int i=0; i < iters; ++i) - func3(z); - } - timer.stop(); - cout << "func3: " << Mflops/timer.elapsedSeconds() << endl; -} - diff --git a/benchmarks/looptest.cpp b/benchmarks/looptest.cpp deleted file mode 100644 index fcd23ffe..00000000 --- a/benchmarks/looptest.cpp +++ /dev/null @@ -1,707 +0,0 @@ -#include - -using namespace blitz; - -void initialize(double& c, double& d, double* a, double* b, int& N); - -template -void sink(T&) -{ } - -void benchmarkLoops(int, long); - -int main() -{ - cout << "This program measures the performance of DAXPY operations" - << endl << "using various C loop structures." << endl << endl; - - cout << endl << "In-cache:" << endl; - - benchmarkLoops(400,50000); - - cout << endl << "Out of cache:" << endl; - - benchmarkLoops(1000000,50); - - return 0; -} - -void benchmarkLoops(int N, long iterations) -{ - double* _bz_restrict a = new double[N]; - double* _bz_restrict b = new double[N]; - double c, d; - double t1, t2; - - initialize(c, d, a, b, N); - - double mflops = iterations * 4.0 * N / (1024.0 * 1024.0); - - Timer timer; - - cout << "Mflops/s Description" << endl; - - long iter; - int i; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=0; i < N; ++i) - a[i] += c * b[i]; - - for (i=0; i < N; ++i) - a[i] += d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=0; i < N; ++i) - a[i] = a[i] + c * b[i]; - - for (i=0; i < N; ++i) - a[i] = a[i] + d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride, no +=" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=N-1; i >= 0; --i) - a[i] += c * b[i]; - - for (i=N-1; i >= 0; --i) - a[i] += d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride, backwards loops" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - a[i] += c2 * b[i]; - a[i+1] += c2 * b[i+1]; - a[i+2] += c2 * b[i+2]; - a[i+3] += c2 * b[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - a[i] += d2 * b[i]; - a[i+1] += d2 * b[i+1]; - a[i+2] += d2 * b[i+2]; - a[i+3] += d2 * b[i+3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - double t1 = c2 * b[i]; - double t2 = c2 * b[i+1]; - double t3 = c2 * b[i+2]; - double t4 = c2 * b[i+3]; - - a[i] += t1; - a[i+1] += t2; - a[i+2] += t3; - a[i+3] += t4; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - double t1 = d2 * b[i]; - double t2 = d2 * b[i+1]; - double t3 = d2 * b[i+2]; - double t4 = d2 * b[i+3]; - - a[i] += t1; - a[i+1] += t2; - a[i+2] += t3; - a[i+3] += t4; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << endl << "\t\t4 read then 4 write" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - a[i] = a[i] + c2 * b[i]; - a[i+1] = a[i+1] + c2 * b[i+1]; - a[i+2] = a[i+2] + c2 * b[i+2]; - a[i+3] = a[i+3] + c2 * b[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - a[i] = a[i] + d2 * b[i]; - a[i+1] = a[i+1] + d2 * b[i+1]; - a[i+2] = a[i+2] + d2 * b[i+2]; - a[i+3] = a[i+3] + d2 * b[i+3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << endl << " no += " - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - int i1 = i + 1; - a[i] += c2 * b[i]; - int i2 = i + 2; - a[i1] += c2 * b[i1]; - int i3 = i + 3; - a[i2] += c2 * b[i2]; - a[i3] += c2 * b[i3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - int i1 = i + 1; - a[i] += d2 * b[i]; - int i2 = i + 2; - a[i1] += d2 * b[i1]; - int i3 = i + 3; - a[i2] += d2 * b[i2]; - a[i3] += d2 * b[i3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << endl << " CSE for index offsets" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - double* pa = a+n1; - double* pb = b+n1; - - int top = N - n1 - 4; - - for (i=top; i >= 0; i -= 4) - { - pa[i] += c2 * pb[i]; - pa[i+1] += c2 * pb[i+1]; - pa[i+2] += c2 * pb[i+2]; - pa[i+3] += c2 * pb[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - pa = a+n2; - pb = b+n2; - - top = N - n2 - 4; - for (i=top; i >= 0; i -= 4) - { - pa[i] += d2 * pb[i]; - pa[i+1] += d2 * pb[i+1]; - pa[i+2] += d2 * pb[i+2]; - pa[i+3] += d2 * pb[i+3]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << " backwards" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 7; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 8) - { - a[i] += c2 * b[i]; - a[i+1] += c2 * b[i+1]; - a[i+2] += c2 * b[i+2]; - a[i+3] += c2 * b[i+3]; - a[i+4] += c2 * b[i+4]; - a[i+5] += c2 * b[i+5]; - a[i+6] += c2 * b[i+6]; - a[i+7] += c2 * b[i+7]; - } - - double d2 = d; - int n2 = N & 7; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 8) - { - a[i] += d2 * b[i]; - a[i+1] += d2 * b[i+1]; - a[i+2] += d2 * b[i+2]; - a[i+3] += d2 * b[i+3]; - a[i+4] += d2 * b[i+4]; - a[i+5] += d2 * b[i+5]; - a[i+6] += d2 * b[i+6]; - a[i+7] += d2 * b[i+7]; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=8, unit stride, constants loaded into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - for (i=0; i < N; ++i) - a[i] += c2 * b[i]; - - double d2 = d; - for (i=0; i < N; ++i) - a[i] += d2 * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, unit stride, constants into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); // Prevent copy propagation - - for (i=0; i < N; i += stride) - a[i] += c * b[i]; - - for (i=0; i < N; i += stride) - a[i] += d * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, non-unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); // Prevent copy propagation - - double c2 = c; - for (i=0; i < N; i += stride) - a[i] += c2 * b[i]; - - double d2 = d; - for (i=0; i < N; i += stride) - a[i] += d2 * b[i]; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, indirection, non-unit stride, constants " - "loaded into temps" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N; - while (pa1 != paend1) - { - *pa1 += c * (*pb1); - ++pa1; - ++pb1; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N; - while (pa2 != paend2) - { - *pa2 += d * (*pb2); - ++pa2; - ++pb2; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N; - double c2 = c; - while (pa1 != paend1) - { - *pa1 += c2 * (*pb1); - ++pa1; - ++pb1; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N; - double d2 = d; - while (pa2 != paend2) - { - *pa2 += d2 * (*pb2); - ++pa2; - ++pb2; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, unit stride, " << endl - << " constants loaded into temps" - << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); - - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N * stride; - while (pa1 != paend1) - { - *pa1 += c * (*pb1); - pa1 += stride; - pb1 += stride; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N * stride; - while (pa2 != paend2) - { - *pa2 += d * (*pb2); - pa2 += stride; - pb2 += stride; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, non-unit stride" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - int stride = 1; - sink(stride); - - double * _bz_restrict pa1 = a, - * _bz_restrict pb1 = b; - double * _bz_restrict paend1 = a + N * stride; - double c2 = c; - int n1 = N & 3; - - for (i=0; i < n1; ++i) - { - *pa1 += c2 * (*pb1); - pa1 += stride; - pb1 += stride; - } - - while (pa1 != paend1) - { - pa1[0] += c2 * pb1[0]; - pa1[1] += c2 * pb1[1]; - pa1[2] += c2 * pb1[2]; - pa1[3] += c2 * pb1[3]; - pa1 += 4 * stride; - pb1 += 4 * stride; - } - - double * _bz_restrict pa2 = a, - * _bz_restrict pb2 = b; - double * _bz_restrict paend2 = a + N * stride; - double d2 = d; - int n2 = N & 3; - - for (i=0; i < n2; ++i) - { - *pa2 += d2 * (*pb2); - pa2 += stride; - pb2 += stride; - } - - while (pa2 != paend2) - { - pa2[0] += d2 * pb2[0]; - pa2[1] += d2 * pb2[1]; - pa2[2] += d2 * pb2[2]; - pa2[3] += d2 * pb2[3]; - pa2 += 4 * stride; - pb2 += 4 * stride; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " while, pointer increment, unroll=4, non-unit stride," << endl - << " constants loaded into temps" << endl; - - /*********************************************************************/ - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - a[i] += c2 * b[i]; - - for (; i < N; i += 4) - { - t1 = a[i+4]; - a[i] += c2 * b[i]; - a[i+1] += c2 * b[i+1]; - t2 = b[i+4]; - a[i+2] += c2 * b[i+2]; - a[i+3] += c2 * b[i+3]; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - a[i] += d2 * b[i]; - - for (; i < N; i += 4) - { - t1 = a[i+4]; - a[i] += d2 * b[i]; - a[i+1] += d2 * b[i+1]; - t2 = b[i+4]; - a[i+2] += d2 * b[i+2]; - a[i+3] += d2 * b[i+3]; - } - } - timer.stop(); - - - sink(t1); - sink(t2); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, constants loaded into temps," - << " prefetching" - << endl; - - /********************************************************************/ - - struct vectorPair { - double a; - double b; - }; - vectorPair* v = new vectorPair[N]; - int N2 = 2*N; - initialize(c, d, (double*)v, (double*)v, N2); - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - for (i=0; i < N; ++i) - v[i].a += c * v[i].b; - - for (i=0; i < N; ++i) - v[i].a += d * v[i].b; - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " interlaced, for, indirection, unit stride" << endl; - - /*********************************************************************/ - - initialize(c, d, (double*)v, (double*)v, N2); - - timer.start(); - for (iter=0; iter < iterations; ++iter) - { - double c2 = c; - - int n1 = N & 3; - for (i=0; i < n1; ++i) - v[i].a += c2 * v[i].b; - - for (; i < N; i += 4) - { - v[i].a += c2 * v[i].b; - v[i+1].a += c2 * v[i+1].b; - v[i+2].a += c2 * v[i+2].b; - v[i+3].a += c2 * v[i+3].b; - } - - double d2 = d; - int n2 = N & 3; - for (i=0; i < n2; ++i) - v[i].a += d2 * v[i].b; - - for (; i < N; i += 4) - { - v[i].a += d2 * v[i].b; - v[i+1].a += d2 * v[i+1].b; - v[i+2].a += d2 * v[i+2].b; - v[i+3].a += d2 * v[i+3].b; - } - } - timer.stop(); - - cout << setw(7) << setprecision(5) << (mflops/timer.elapsedSeconds()) - << " for, unroll=4, unit stride, interlaced, " << endl - << "\t\tconstants loaded into temps" - << endl; - - delete [] v; - - /********************************************************************/ - - delete [] a; - delete [] b; -} - -void initialize(double& c, double& d, double* a, double* b, int& N) -{ - for (int i=0; i < N; ++i) - { - a[i] = 1/7.; - b[i] = 1/3.; - } - c = 0.398192839842; - d = - c; -} - diff --git a/benchmarks/makelogo.cpp b/benchmarks/makelogo.cpp deleted file mode 100644 index 73f9f87c..00000000 --- a/benchmarks/makelogo.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include - -using namespace blitz; - -void makeLogo(); - -int main() -{ - makeLogo(); - return 0; -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N, int M); - -void snapshot(const Array& P, const Array& c); - -void makeLogo() -{ - const int N = 300, M = 900; - int niters = 3000; - - Array P1, P2, P3, c; - allocateArrays(shape(N,M), P1, P2, P3, c); - Range I(1,N-2), J(1,M-2); - - setInitialConditions(c, P1, P2, P3, N, M); - - for (int iter=0; iter < niters; ++iter) - { - P3(I,J) = (2-4*c(I,J)) * P2(I,J) - + c(I,J)*(P2(I-1,J) + P2(I+1,J) + P2(I,J-1) + P2(I,J+1)) - - P1(I,J); - - cycleArrays(P1,P2,P3); - - snapshot(P2, c); - } - -} - -void setInitialConditions(Array& c, Array& P1, - Array& P2, Array& P3, int N, int M) -{ - // Set the velocity field - c = 0.3; - - ifstream ifs("blitz3.pgm"); - char tmpBuf[128]; - int pixel; - ifs.getline(tmpBuf, 128); - ifs.getline(tmpBuf, 128); - ifs.getline(tmpBuf, 128); - - for (int pi=0; pi < 199; ++pi) - { - for (int pj=0; pj < 798; ++pj) - { - ifs >> pixel; - if (pixel) - c(pi+50,pj+56) = 0.02; - } - } - - // Initial pressure distribution: gaussian pulse - using namespace blitz::tensor; - int cr = N/6-1; -// int cc = 7.0*M/8.0-1; - float s2 = 64.0 * 9.0 / pow2(N/2.0); - P1 = 0.0; -// P2 = exp(-(pow2(i-cr)+pow2(j-cc)) * s2); - P2 = exp(-(pow2(i-cr)) * s2); - - P3 = 0.0; -} - - -void snapshot(const Array& P, const Array& c) -{ - static int count = 0, snapshotNum = 0; - if (++count < 50) - return; - - count = 0; - ++snapshotNum; - char filename[128]; - sprintf(filename, "logo%03d.m", snapshotNum); - - ofstream ofs(filename); - int N = P.length(firstDim); - int M = P.length(secondDim); - - float Pmin = -0.6; - float PScale = 1.0/1.2; - float VScale = 1.0; - - ofs << "P" << snapshotNum << " = [ "; - for (int i=0; i < N; ++i) - { - for (int j=0; j < M; ++j) - { - float value1 = (P(i,j)-Pmin)*PScale; - float value2 = c(i,j)*VScale; - int r1 = value1 * 4096; - int r2 = value2 * 4096; - ofs << r1 << " " << r2 << " "; - } - if (i < N-1) - ofs << ";" << endl; - } - ofs << "];" << endl; -} - diff --git a/benchmarks/makeloops.cpp b/benchmarks/makeloops.cpp deleted file mode 100644 index cab22342..00000000 --- a/benchmarks/makeloops.cpp +++ /dev/null @@ -1,791 +0,0 @@ - - -#include -#include -#include -#include -#include -#include -#include -#include - -class loop -{ - -public: - loop() - { - reset(); - } - - void reset(); - - void parseLoop(istream& is); - - int numArrays() const - { - return numArrays_; - } - - char arrayName(int i) const - { - return arrays_[i]; - } - - int numScalars() const - { - return numScalars_; - } - - char scalarName(int i) const - { - return scalars_[i]; - } - - const char* loopName() const - { - return loopName_; - } - - const char* loopBuffer() const - { - return loopBuffer_; - } - - int flops() const - { - return flops_; - } - - int isArray(char c) const - { - for (int i=0; i < numArrays_; ++i) - if (arrays_[i] == c) - return 1; - return 0; - } - - int isScalar(char c) const - { - for (int i=0; i < numScalars_; ++i) - if (scalars_[i] == c) - return 1; - return 0; - } - -private: - enum { maxArrays = 20, maxScalars = 20, bufLen = 128 }; - - char arrays_[maxArrays]; - int numArrays_; - char scalars_[maxScalars]; - int numScalars_; - char loopBuffer_[bufLen]; - char loopName_[bufLen]; - int flops_; -}; - -void loop::reset() -{ - numArrays_ = 0; - numScalars_ = 0; - loopBuffer_[0] = '\0'; - loopName_[0] = '\0'; - flops_ = 0; -} - -void loop::parseLoop(istream& is) -{ - const int bufLen = 128; - char buffer[bufLen]; - - const char* whitespace = " \t"; - - reset(); - - while (!is.eof()) { - is.getline(buffer, bufLen); - char* token = strtok(buffer, whitespace); - if (!token) - continue; - - if (!strcmp(token, "begin")) { - token = strtok(0, whitespace); - strcpy(loopName_, token); - cout << "Creating loop: " << loopName_ << endl; - } else if (!strcmp(token, "end")) - return; - else if (!strcmp(token, "array")) { - while (token = strtok(0, whitespace)) { - arrays_[numArrays_++] = token[0]; - cout << "Array: " << token[0] << endl; - } - } else if (!strcmp(token, "scalar")) { - while (token = strtok(0, whitespace)) { - scalars_[numScalars_++] = token[0]; - cout << "Scalar: " << token[0] << endl; - } - } else if (!strcmp(token, "flops")) { - token = strtok(0, whitespace); - flops_ = atoi(token); - cout << "Flops: " << flops_ << endl; - } else if (!strcmp(token, "loop")) { - loopBuffer_[0] = '\0'; - while (token = strtok(0, whitespace)) - strcat(loopBuffer_, token); - cout << "Loop: " << loopBuffer_ << endl; - } - } -} - -void fortranVersion(loop& lp) -{ - const char* numtype = "REAL*8"; - - char filename[128]; - sprintf(filename, "%sf.f", lp.loopName()); - ofstream ofs(filename); - - ofs << " SUBROUTINE " << lp.loopName() << "_F77(N"; - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl << endl - << " DO i=1,N" << endl - << " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] == ';') - ofs << endl << " "; - else if (loopBuffer[i] != '$') - ofs << loopBuffer[i]; - else { - ++i; - if (lp.isArray(loopBuffer[i])) - ofs << loopBuffer[i] << "(i)"; - else if (lp.isScalar(loopBuffer[i])) - ofs << loopBuffer[i]; - } - } - - ofs << endl - << " END DO" << endl; - - ofs << " RETURN" << endl - << " END" << endl; - - ofs << endl << endl - << " SUBROUTINE " << lp.loopName() << "_F77Overhead(N"; - - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl - << " RETURN" << endl - << " END" << endl; -} - -void fortran90Version(loop& lp) -{ - const char* numtype = "REAL*8"; - - char filename[128]; - sprintf(filename, "%sf90.f90", lp.loopName()); - ofstream ofs(filename); - - ofs << " SUBROUTINE " << lp.loopName() << "_F90(N"; - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl << endl - << " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] == ';') - ofs << endl << " "; - else if (loopBuffer[i] != '$') - ofs << loopBuffer[i]; - } - - ofs << endl - << " RETURN" << endl - << " END" << endl; - - ofs << endl << endl - << " SUBROUTINE " << lp.loopName() << "_F90Overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - ofs << ")" << endl - << " INTEGER i, N" << endl - << " " << numtype << " " << lp.arrayName(0) << "(N)"; - - for (int i=1; i < lp.numArrays(); ++i) - ofs << ", " << lp.arrayName(i) << "(N)"; - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", " << lp.scalarName(i); - - ofs << endl << endl - << " RETURN" << endl - << " END" << endl; -} - -void writeFortranDecl(ofstream& ofs, const char* version, loop& lp, - const char* numtype); -void VectorVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void ArrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void ValarrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void F77Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); -void F90Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2); - -void cppVersion(loop& lp) -{ - const char* numtype = "double"; - - char filename[128]; - sprintf(filename, "%s.cpp", lp.loopName()); - ofstream ofs(filename); - - char capsLoopName[128]; - for (int i=0; i <= strlen(lp.loopName()); ++i) - capsLoopName[i] = toupper(lp.loopName()[i]); - - ofs << "// Generated code (makeloops.cpp) -- do not edit." << endl << endl - << "// In KAI C++ 3.2, restrict causes problems for copy propagation." - << endl << "// Temporary fix: disable restrict" << endl << endl - << "#define BZ_DISABLE_RESTRICT" << endl << endl - << - "#include \n" - "#include \n" - "#include \n" - "#include \n" - "\n" - "// Generated: " << __FILE__ << " " << __DATE__ << endl << endl << - "#ifdef BZ_HAVE_VALARRAY\n" - " #define BENCHMARK_VALARRAY\n" - "#endif\n\n" - "#ifdef BENCHMARK_VALARRAY\n" - "#include \n" - "#endif\n" - "\n" - "using namespace blitz;\n" - "\n" - "#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)\n" - " #define " << lp.loopName() << "_f77 " << lp.loopName() << "_f77_\n" - " #define " << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead_\n" - - " #define " << lp.loopName() << "_f90 " << lp.loopName() << "_f90_\n" - " #define " << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead_\n" - - "#elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)\n" - " #define " << lp.loopName() << "_f77 " << lp.loopName() << "_f77__\n" - " #define " << lp.loopName() << "_f77overhead " << lp.loopName() << "_f77overhead__\n" - - " #define " << lp.loopName() << "_f90 " << lp.loopName() << "_f90__\n" - " #define " << lp.loopName() << "_f90overhead " << lp.loopName() << "_f90overhead__\n" - - "#elif defined(BZ_FORTRAN_SYMBOLS_CAPS)\n" - " #define " << lp.loopName() << "_f77 " << capsLoopName << "_F77\n" - " #define " << lp.loopName() << "_f77overhead " << capsLoopName << "_F77OVERHEAD\n" - " #define " << lp.loopName() << "_f90 " << capsLoopName << "_F90\n" - " #define " << lp.loopName() << "_f90overhead " << capsLoopName << "_F90OVERHEAD\n" - "#endif\n" - "\n" - "extern \"C\" {" << endl; - - writeFortranDecl(ofs, "_f77", lp, numtype); - writeFortranDecl(ofs, "_f77overhead", lp, numtype); - writeFortranDecl(ofs, "_f90", lp, numtype); - writeFortranDecl(ofs, "_f90overhead", lp, numtype); - - ofs << "}" << endl << endl; - - // Create a string with a list of arguments for the scalars - ostrstream tmpbuf; - for (int i=0; i < lp.numScalars(); ++i) { - tmpbuf << ", " << numtype << " " << lp.scalarName(i); - } - tmpbuf << '\0'; - const char* scalarArgs = tmpbuf.str(); - - ofs << "void VectorVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "void ArrayVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "void F77Version(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "#ifdef FORTRAN_90" << endl - << "void F90Version(BenchmarkExt& bench" - << scalarArgs << ");" << endl - << "#endif" << endl - << "#ifdef BENCHMARK_VALARRAY" << endl - << "void ValarrayVersion(BenchmarkExt& bench" - << scalarArgs << ");" << endl << "#endif" << endl << endl; - - ofs << "void sink() {}\n\n"; - - ofs << "int main()\n" - "{\n" - " int numBenchmarks = 5;\n" - "#ifndef BENCHMARK_VALARRAY\n" - " numBenchmarks--; // No valarray\n" - "#endif\n" - "#ifndef FORTRAN_90\n" - " numBenchmarks--; // No fortran 90\n" - "#endif\n" - - "\n" - " BenchmarkExt bench(\"" << lp.loopName() << ": " - << lp.loopBuffer() << "\", numBenchmarks);\n" - "\n" - " const int numSizes = 23;\n" - " bench.setNumParameters(numSizes);\n" - " bench.setRateDescription(\"Mflops/s\");\n" - "\n" - " Vector parameters(numSizes);\n" - " Vector iters(numSizes);\n" - " Vector flops(numSizes);\n" - "\n" - " for (int i=0; i < numSizes; ++i)\n" - " {\n" - " parameters[i] = (int)pow(10.0, (i+1)/4.0);\n" - " iters[i] = 10000000L / parameters[i];\n" - " if (iters[i] < 2)\n" - " iters[i] = 2;\n" - " flops[i] = " << lp.flops() << " * parameters[i];\n" - " }\n" - "\n" - " bench.setParameterVector(parameters);\n" - " bench.setIterations(iters);\n" - " bench.setOpsPerIteration(flops);\n" - "\n" - " bench.beginBenchmarking();" << endl << endl; - - // Create literals - for (int i=0; i < lp.numScalars(); ++i) { - ofs << " " << numtype << " " << lp.scalarName(i) - << " = 0.39123982498157938742;" << endl; - } - - ofs << endl; - - ofs.flush(); - - // Create a string with a list of arguments for the scalars - ostrstream tmpbuf2; - for (int i=0; i < lp.numScalars(); ++i) { - tmpbuf2 << ", " << lp.scalarName(i); - } - tmpbuf2 << '\0'; - char* scalarArgs2 = tmpbuf2.str(); - - ofs << " VectorVersion(bench" << scalarArgs2 << ");" << endl - << " ArrayVersion(bench" << scalarArgs2 << ");" << endl - << " F77Version(bench" << scalarArgs2 << ");" << endl - << "#ifdef FORTRAN_90" << endl - << " F90Version(bench" << scalarArgs2 << ");" << endl - << "#endif" << endl - << "#ifdef BENCHMARK_VALARRAY" << endl - << " ValarrayVersion(bench" << scalarArgs2 << ");" << endl - << "#endif" << endl << endl << - " bench.endBenchmarking();\n" - "\n" - " bench.saveMatlabGraph(\"" << lp.loopName() << ".m\");\n" - "\n" - " return 0;\n" - "}\n\n" - "template\n" - "void initializeRandomDouble(T data, int numElements, int stride = 1)\n" - "{\n" - " static Random rnd;\n" - "\n" - " for (int i=0; i < numElements; ++i)\n" - " data[size_t(i*stride)] = rnd.random();\n" - "}\n" - "\n" - "template\n" - "void initializeArray(T& array, int numElements)\n" - "{\n" - " static Random rnd;\n" - "\n" - " for (size_t i=0; i < numElements; ++i)\n" - " array[i] = rnd.random();\n" - "}\n\n"; - - VectorVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); - ArrayVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); - ValarrayVersion(ofs, lp, numtype, scalarArgs, scalarArgs2); - F77Version(ofs, lp, numtype, scalarArgs, scalarArgs2); - F90Version(ofs, lp, numtype, scalarArgs, scalarArgs2); -} - -void writeFortranDecl(ofstream& ofs, const char* version, loop& lp, - const char* numtype) -{ - ofs << " void " << lp.loopName() << version - << "(const int& N"; - - for (int i=0; i < lp.numArrays(); ++i) - ofs << ", " << numtype << "* " << lp.arrayName(i); - for (int i=0; i < lp.numScalars(); ++i) - ofs << ", const " << numtype << "& " << lp.scalarName(i); - ofs << ");" << endl << endl; -} - -void VectorVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "void VectorVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"Vector\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"Vector: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " Vector<" << numtype << "> " << lp.arrayName(i) - << "(N);" << endl - << " initializeRandomDouble(" << lp.arrayName(i) << ".data(), N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] != '$') - os << loopBuffer[i]; - } - - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n" - - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl; -} - -void ArrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "void ArrayVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"Array\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"Array: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " Array<" << numtype << ", 1> " << lp.arrayName(i) - << "(N);" << endl - << " initializeRandomDouble(" << lp.arrayName(i) << ".dataFirst(), N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] != '$') - os << loopBuffer[i]; - } - - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n"; - os << - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl; -} - -void F77Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "void F77Version(BenchmarkExt& bench" - << scalarArgs << ")\n" - "{\n" - " bench.beginImplementation(\"Fortran 77\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n\n" - " cout << \"Fortran 77: N = \" << N << endl;\n" - " cout.flush();\n\n" - " int iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " " << numtype << "* " << lp.arrayName(i) - << " = new " << numtype << "[N];" << endl - << " initializeRandomDouble(" << lp.arrayName(i) - << ", N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f77(N"; - - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f77overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n"; - - os << endl << - " bench.stopOverhead();\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " delete [] " << lp.arrayName(i) << ";" << endl; - } - - os << " }\n" - "\n" - " bench.endImplementation();\n" - "}\n" << endl; - -} - -void F90Version(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "#ifdef FORTRAN_90" << endl - << "void F90Version(BenchmarkExt& bench" - << scalarArgs << ")\n" - "{\n" - " bench.beginImplementation(\"Fortran 90\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n\n" - " cout << \"Fortran 90: N = \" << N << endl;\n" - " cout.flush();\n\n" - " int iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " " << numtype << "* " << lp.arrayName(i) - << " = new " << numtype << "[N];" << endl - << " initializeRandomDouble(" << lp.arrayName(i) - << ", N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f90(N"; - - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (int iter=0; iter < iters; ++iter)\n" - " " << lp.loopName() << "_f90overhead(N"; - for (int i=0; i < lp.numArrays(); ++i) - os << ", " << lp.arrayName(i); - os << scalarArgs2 << ");\n"; - - os << endl << - " bench.stopOverhead();\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " delete [] " << lp.arrayName(i) << ";" << endl; - } - - os << " }\n" - "\n" - " bench.endImplementation();\n" - "}\n" - << "#endif\n" << endl; - -} - -void ValarrayVersion(ostream& os, loop& lp, const char* numtype, - const char* scalarArgs, const char* scalarArgs2) -{ - os << "#ifdef BENCHMARK_VALARRAY" << endl; - os << "void ValarrayVersion(BenchmarkExt& bench" - << scalarArgs << ")\n" - << - "{\n" - " bench.beginImplementation(\"valarray\");\n" - "\n" - " while (!bench.doneImplementationBenchmark())\n" - " {\n" - " int N = bench.getParameter();\n" - " cout << \"valarray: N = \" << N << endl;\n" - " cout.flush();\n" - "\n" - " long iters = bench.getIterations();\n" - "\n"; - - for (int i=0; i < lp.numArrays(); ++i) { - os << " valarray<" << numtype << "> " << lp.arrayName(i) - << "(N);" << endl - << " initializeArray(" << lp.arrayName(i) << ", N);" << endl; - } - - os << endl << - " bench.start();\n" - " for (long i=0; i < iters; ++i)\n" - " {\n" - " "; - - const char* loopBuffer = lp.loopBuffer(); - - for (int i=0; loopBuffer[i]; ++i) { - if (loopBuffer[i] != '$') - os << loopBuffer[i]; - } - - os << ";" << endl << - " sink();\n"; - - os << - " }\n" - " bench.stop();\n\n" - " bench.startOverhead();\n" - " for (long i=0; i < iters; ++i)\n" - " sink();\n" - " bench.stopOverhead();\n" - - " }\n" - "\n" - " bench.endImplementation();\n" - "}" << endl << endl << "#endif" << endl; -} - -int main() -{ - ifstream ifs("loops.data"); - - //ofstream ofs("makefile.inc"); - - loop lp; - - while (!ifs.eof()) { - lp.parseLoop(ifs); - - if (ifs.eof()) - break; - - /* - ofs -#ifdef FORTRAN_90 - << lp.loopName() << "f90.o:\t" << lp.loopName() << "f90.f" - << endl << "\t$(F90) $(F90FLAGS) -c " << lp.loopName() << "f90.f" - << endl << endl -#endif - << lp.loopName() << ":\t" << lp.loopName() << ".o " - << lp.loopName() << "f.o " -#ifdef FORTRAN_90 - << lp.loopName() << "f90.o" -#endif - << endl - << "\t$(CXX) $(CXXFLAGS) $(LDFLAGS) -o " << lp.loopName() << " " - << lp.loopName() << ".o " << lp.loopName() << "f.o " -#ifdef FORTRAN_90 - << lp.loopName() << "f90.o " -#endif - << "$(LIBS)" << endl << endl; - */ - - fortranVersion(lp); -//#ifdef FORTRAN_90 - fortran90Version(lp); -//#endif - cppVersion(lp); - } - return 0; -} - - - diff --git a/benchmarks/plot_benchmarks.m.in b/benchmarks/plot_benchmarks.m.in deleted file mode 100644 index 99e2ef76..00000000 --- a/benchmarks/plot_benchmarks.m.in +++ /dev/null @@ -1,57 +0,0 @@ -function plot_benchmarks(hostname) -% function plot_benchmarks(hostname) - -% -% $Id$ -% -% Copyright (c) 2001 Patrick Guio -% -% All Rights Reserved. -% -% This program is free software; you can redistribute it and/or modify it -% under the terms of the GNU General Public License as published by the -% Free Software Foundation; either version 2. of the License, or (at your -% option) any later version. -% -% This program is distributed in the hope that it will be useful, but -% WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -% Public License for more details. -% - -close all - -bench={'daxpy','haney','loop1','loop2','loop3','loop5','loop6','loop8',... - 'loop9','loop10','loop11','loop12','loop13','loop14','loop15','loop16',... - 'loop17','loop18','loop19','loop21','loop22','loop23','loop24','loop25',... - 'loop36','stencil'}; - -if nargin == 0, - str=sprintf('@PACKAGE@-@VERSION@ benchmark on a %s', computer); -else - str=sprintf('@PACKAGE@-@VERSION@ benchmark on %s (%s)', hostname, computer); -end -str=sprintf('%s\nCXX=@CXX@ CXXFLAGS=@CXXFLAGS@ @CXX_OPTIMIZE_FLAGS@', str); -str=sprintf('%s\nF77=@F77@ FFLAGS=@FFLAGS@ @F77_OPTIMIZE_FLAGS@', str); -if length('@FC@') - str=sprintf('%s\nFC=@FC@ FCFLAGS=@FCFLAGS@ @FC_OPTIMIZE_FLAGS@', str); -end -str=strrep(str,'_','\_'); -h=text(0.5,0.5,str); -set(h,'HorizontalAlignment','center') -set(h,'FontSize',18) -set(h,'FontWeight','demi') -set(gca,'visible','off') -orient landscape -print -dpsc benchmarks.ps - -for i=1:length(bench), - eval(bench{i}) - hs=get(gca,'children')'; - for h=hs, set(h,'linewidth',1.5) , end - legend - orient landscape - print -dpsc -append benchmarks.ps -end - - diff --git a/benchmarks/qcd.cpp b/benchmarks/qcd.cpp deleted file mode 100644 index 040beb9b..00000000 --- a/benchmarks/qcd.cpp +++ /dev/null @@ -1,244 +0,0 @@ -#include -#include -#include -#include - -#ifdef BZ_HAVE_COMPLEX - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define qcdf qcdf_ - #define qcdf2 qcdf2_ -#elif defined( BZ_FORTRAN_SYMBOLS_CAPS) - #define qcdf QCDF - #define qcdf2 QCDF2 -#endif - -extern "C" { - void qcdf(const void* M, void* res, const void* src, const int& N, - const int& iters); - void qcdf2(const void* M, void* res, const void* src, const int& N, - const int& iters); -} - -int QCDBlitzVersion(BenchmarkExt& bench); -int QCDBlitzTunedVersion(BenchmarkExt& bench); -int QCDFortran77Version(BenchmarkExt& bench); -int QCDFortran77TunedVersion(BenchmarkExt& bench); - -void initializeRandomDouble(double* data, int numElements); - -int main() -{ - cout << "Blitz++ QCD Benchmark" << endl - << "Working... (this may take a while) "; - cout.flush(); - - BenchmarkExt bench("Lattice QCD Benchmark", 4); - - bench.setRateDescription("Millions of operations/s"); - bench.beginBenchmarking(); - - QCDBlitzVersion(bench); - QCDBlitzTunedVersion(bench); - QCDFortran77Version(bench); - QCDFortran77TunedVersion(bench); - - bench.endBenchmarking(); - - bench.saveMatlabGraph("qcd.m"); - - cout << "Done." << endl; - - return 0; -} - -int QCDBlitzVersion(BenchmarkExt& bench) -{ - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> SU3Gauge; - - bench.beginImplementation("Blitz++"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - -cout << "length = " << length << endl; - - Vector res(length), src(length); - Vector M(length); - - initializeRandomDouble((double*)src.data(), - length * sizeof(spinor) / sizeof(double)); - initializeRandomDouble((double*)M.data(), - length * sizeof(SU3Gauge) / sizeof(double)); - - bench.start(); - for (long i=0; i < iters; ++i) - { - for (int i=0; i < length; ++i) - res[i] = product(M[i], src[i]); - } - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> gaugeFieldElement; - - struct latticeUnit { - spinor one; - gaugeFieldElement gauge; - spinor two; - }; - -int QCDBlitzTunedVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ (tuned)"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector lattice(length); - - initializeRandomDouble((double*)lattice.data(), - length * sizeof(latticeUnit) / sizeof(double)); - - bench.start(); - for (long i=0; i < iters; ++i) - { - for (int i=0; i < length; ++i) - lattice[i].two = product(lattice[i].gauge, lattice[i].one); - } - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - -int QCDFortran77Version(BenchmarkExt& bench) -{ - // Use Blitz++ library only to allocate space for the - // arrays. - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> SU3Gauge; - - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector res(length), src(length); - Vector M(length); - - initializeRandomDouble((double*)src.data(), - length * sizeof(spinor) / sizeof(double)); - initializeRandomDouble((double*)M.data(), - length * sizeof(SU3Gauge) / sizeof(double)); - - bench.start(); - qcdf(M.data(), res.data(), src.data(), length, iters); - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - -int QCDFortran77TunedVersion(BenchmarkExt& bench) -{ - // Use Blitz++ library only to allocate space for the - // arrays. - typedef TinyMatrix, 3, 2> spinor; - typedef TinyMatrix, 3, 3> SU3Gauge; - - bench.beginImplementation("Fortran 77 Hand-tuned"); - - while (!bench.doneImplementationBenchmark()) - { - int length = bench.getParameter(); - int iters = (int)bench.getIterations(); - - Vector res(length), src(length); - Vector M(length); - - initializeRandomDouble((double*)src.data(), - length * sizeof(spinor) / sizeof(double)); - initializeRandomDouble((double*)M.data(), - length * sizeof(SU3Gauge) / sizeof(double)); - - bench.start(); - qcdf2(M.data(), res.data(), src.data(), length, iters); - bench.stop(); - - // Time overhead - bench.startOverhead(); - for (long i=0; i < iters; ++i) - { - } - bench.stopOverhead(); - } - - bench.endImplementation(); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements) -{ - // This is a temporary kludge until I implement random complex - // numbers. - - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i] = rnd.random(); -} - -#else // BZ_HAVE_COMPLEX - -#include - -int main() -{ - cout << "This benchmark requires from the ISO/ANSI C++ standard." - << endl; - return 0; -} - -#endif // BZ_HAVE_COMPLEX diff --git a/benchmarks/qcd.txt b/benchmarks/qcd.txt deleted file mode 100644 index c244da47..00000000 --- a/benchmarks/qcd.txt +++ /dev/null @@ -1,19 +0,0 @@ - -The most expensive routines in terms of CPU time were a family of routines -that multiplied a 2-spinors by SU(3) gauge elements: The core of these -routines is basically the same as the following code: - -COMPLEX M(V,3,3) res(V,3,2), src(V,3,2) - DO spin=1,2 - DO col=1,3 - DO site=1,V - res(site,col,spin)= - M(site,col,1) * src(site,1,spin) - + M(site,col,2) * src(site,2,spin) - + M(site,col,3) * src(site,3,spin) - END DO - END DO - END DO - -http://www.epcc.ed.ac.uk/t3d/documents/techreports/EPCC-TR96-03/EPCC-TR96-03.book_1.html - diff --git a/benchmarks/qcdf.f b/benchmarks/qcdf.f deleted file mode 100644 index f65f0c02..00000000 --- a/benchmarks/qcdf.f +++ /dev/null @@ -1,77 +0,0 @@ -C Initial Fortran 77 version of the Lattice QCD benchmark. - - subroutine qcdf(M, res, src, V, iters) - integer V, iters, i, site, spin, col - complex*16 M(3,3,V), res(3,2,V), src(3,2,V) - - DO i=1,iters - DO site=1,V - DO spin=1,2 - DO col=1,3 - res(col,spin,site) = M(col,1,site) * src(1,spin,site) - . + M(col,2,site) * src(2,spin,site) - . + M(col,3,site) * src(3,spin,site) - ENDDO - ENDDO - ENDDO - ENDDO - - return - end - -C Hand-tuned version -C Changes: -C o Ordering of array altered to improve layout of data in memory -C o col and spin loops unwound; it was found that unwinding the -C col loop inside the spin loop was marginally faster (by 1.1%) -C o Unwinding both loops was faster than unwinding just one. - - subroutine qcdf2(M, res, src, V, iters) - integer V, iters, i, site - complex*16 M(3,3,V), res(3,2,V), src(3,2,V) - - DO i=1,iters - DO site=1,V - -C col=1, spin=1 - - res(1,1,site) = M(1,1,site) * src(1,1,site) - . + M(1,2,site) * src(2,1,site) - . + M(1,3,site) * src(3,1,site) - -C col=2, spin=1 - - res(2,1,site) = M(2,1,site) * src(1,1,site) - . + M(2,2,site) * src(2,1,site) - . + M(2,3,site) * src(3,1,site) - -C col=3, spin=1 - - res(3,1,site) = M(3,1,site) * src(1,1,site) - . + M(3,2,site) * src(2,1,site) - . + M(3,3,site) * src(3,1,site) - -C col=1, spin=2 - - res(1,2,site) = M(1,1,site) * src(1,2,site) - . + M(1,2,site) * src(2,2,site) - . + M(1,3,site) * src(3,2,site) - -C col=2, spin=2 - - res(2,2,site) = M(2,1,site) * src(1,2,site) - . + M(2,2,site) * src(2,2,site) - . + M(2,3,site) * src(3,2,site) - -C col=3, spin=2 - - res(3,2,site) = M(3,1,site) * src(1,2,site) - . + M(3,2,site) * src(2,2,site) - . + M(3,3,site) * src(3,2,site) - - ENDDO - ENDDO - - return - end - diff --git a/benchmarks/quinlan.cpp b/benchmarks/quinlan.cpp deleted file mode 100644 index 3d246a63..00000000 --- a/benchmarks/quinlan.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include - -using namespace blitz; - -template -void sink(T&) { } - -void foo() -{ - Vector A(100), B(100); - - A = B + B; - sink(A); - sink(B); -} - - diff --git a/benchmarks/stencil.cpp b/benchmarks/stencil.cpp deleted file mode 100644 index b923373f..00000000 --- a/benchmarks/stencil.cpp +++ /dev/null @@ -1,453 +0,0 @@ -// Array stencil benchmark - -#include -#include -#include -#include -#include - -using namespace blitz; - -#if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES) - #define stencilf stencilf_ - #define stencilftiled stencilftiled_ - #define stencilf90 stencilf90_ -#elif defined(BZ_FORTRAN_SYMBOLS_CAPS) - #define stencilf STENCILF - #define stencilftiled STENCILFTILED - #define stencilf90 STENCILF90 -#endif - -extern "C" { - void stencilf(double* A, double* B, int& N, int& iters); - void stencilftiled(double* A, double* B, int& N, int& iters); - void stencilf90(double* A, double* B, int& N, int& iters); -} - -#ifdef FORTRAN_90 -void stencilFortran90Version(BenchmarkExt& bench); -#endif -void stencilFortran77Version(BenchmarkExt& bench); -void stencilFortran77VersionTiled(BenchmarkExt& bench); -void stencilBlitzVersion(BenchmarkExt& bench); -void stencilBlitzExpressionVersion(BenchmarkExt& bench); -void stencilBlitzProductVersion(BenchmarkExt& bench); -void stencilBlitzProductVersion2(BenchmarkExt& bench); -void stencilBlitzProductVersion3(BenchmarkExt& bench); -void stencilBlitzStencilVersion(BenchmarkExt& bench); -void stencilBlitzIndexVersion(BenchmarkExt& bench); - -int main() -{ - int numBenchmarks = 10; -#ifndef FORTRAN_90 - numBenchmarks--; // No fortran 90 -#endif - - BenchmarkExt bench("Array stencil", numBenchmarks); - - const int numSizes = 28; - - bench.setNumParameters(numSizes); - bench.setRateDescription("Mflops/s"); - - Vector parameters(numSizes); - Vector iters(numSizes); - Vector flops(numSizes); - - for (int i=0; i < numSizes; ++i) - { - parameters[i] = (i+1) * 8; - iters[i] = 32*8*8*8/(i+1)/(i+1)/(i+1)/4; - if (iters[i] < 2) - iters[i] = 2; - int npoints = parameters[i] - 2; - flops[i] = npoints * npoints * npoints * 7 * 2; - } - - bench.setParameterVector(parameters); - bench.setIterations(iters); - bench.setOpsPerIteration(flops); - - bench.beginBenchmarking(); -#ifdef FORTRAN_90 - stencilFortran90Version(bench); -#endif - stencilBlitzVersion(bench); - stencilBlitzStencilVersion(bench); - stencilBlitzExpressionVersion(bench); - stencilBlitzProductVersion(bench); - stencilBlitzProductVersion2(bench); - stencilBlitzProductVersion3(bench); - stencilBlitzIndexVersion(bench); - stencilFortran77Version(bench); - stencilFortran77VersionTiled(bench); - bench.endBenchmarking(); - - bench.saveMatlabGraph("stencil.m","plot"); - - return 0; -} - -void initializeRandomDouble(double* data, int numElements, int stride = 1) -{ - static Random rnd; - - for (int i=0; i < numElements; ++i) - data[i*stride] = rnd.random(); -} - -void stencilBlitzVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ Range Expr"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - - A(I,J,K) = c * (B(I,J,K) + B(I+1,J,K) + B(I-1,J,K) + B(I,J+1,K) - + B(I,J-1,K) + B(I,J,K+1) + B(I,J,K-1)); - - B(I,J,K) = c * (A(I,J,K) + A(I+1,J,K) + A(I-1,J,K) + A(I,J+1,K) - + A(I,J-1,K) + A(I,J,K+1) + A(I,J,K-1)); - } - bench.stop(); - } - - bench.endImplementation(); -} - -BZ_DECLARE_STENCIL_OPERATOR1(test1,B) -return (1./7) * ( (*B) + B.shift(1,0) + B.shift(-1,0) + B.shift(1,1) - + B.shift(-1,1) + B.shift(1,2) + B.shift(-1,2)); -BZ_END_STENCIL_OPERATOR - -BZ_ET_STENCIL(test1, double, double,shape(-1,-1,-1),shape(1,1,1)) - -BZ_DECLARE_STENCIL2(test1stencil,A,B) - A=test1(B); -BZ_END_STENCIL_WITH_SHAPE(shape(-1,-1,-1),shape(1,1,1)) - -void stencilBlitzExpressionVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ StencilOp"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - A(I,J,K) = test1(B); - - B(I,J,K) = test1(A); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzProductVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ product"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator on product: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N),C(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - C=B*B; - A(I,J,K) = test1(C); - C=A*A; - B(I,J,K) = test1(C); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzProductVersion2(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ product w alloc"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator on product: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - { - ArrayC(B*B); - A(I,J,K) = test1(C); - } - { - ArrayC(A*A); - B(I,J,K) = test1(C); - } - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzProductVersion3(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ product expr"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil Operator on product expr: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - A(I,J,K) = test1(B*B); - - B(I,J,K) = test1(A*A); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzIndexVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ Indexed StencilOp"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Indexed Stencil Operator: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - A(I,J,K) = test1(B(tensor::i, tensor::j, tensor::k)); - - B(I,J,K) = test1(A(tensor::i, tensor::j, tensor::k)); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilBlitzStencilVersion(BenchmarkExt& bench) -{ - bench.beginImplementation("Blitz++ Stencil"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Blitz++ Stencil: N = " << N << endl; - cout.flush(); - - long iters = bench.getIterations(); - - Array A(N,N,N), B(N,N,N); - initializeRandomDouble(A.data(), N*N*N, A.stride(thirdDim)); - initializeRandomDouble(B.data(), N*N*N, B.stride(thirdDim)); - TinyVector size = N-2; - generateFastTraversalOrder(size); - double c = 1/7.; - - ; bench.start(); - for (long i=0; i < iters; ++i) - { - Range I(1,N-2), J(1,N-2), K(1,N-2); - applyStencil(test1stencil(),A,B); - applyStencil(test1stencil(),B,A); - } - bench.stop(); - } - - bench.endImplementation(); -} - -void stencilFortran77Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - bench.start(); - stencilf(A, B, N, iters); - bench.stop(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} - -void stencilFortran77VersionTiled(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 77 (tiled)"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 77: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - bench.start(); - stencilftiled(A, B, N, iters); - bench.stop(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} - -#ifdef FORTRAN_90 -void stencilFortran90Version(BenchmarkExt& bench) -{ - bench.beginImplementation("Fortran 90"); - - while (!bench.doneImplementationBenchmark()) - { - int N = bench.getParameter(); - - cout << "Fortran 90: N = " << N << endl; - cout.flush(); - - int iters = (int)bench.getIterations(); - - size_t arraySize = size_t(N) * size_t(N) * N; - - double* A = new double[arraySize]; - double* B = new double[arraySize]; - - initializeRandomDouble(A, arraySize); - initializeRandomDouble(B, arraySize); - - bench.start(); - stencilf90(A, B, N, iters); - bench.stop(); - - delete [] A; - delete [] B; - } - - bench.endImplementation(); -} -#endif diff --git a/benchmarks/stencilf.f b/benchmarks/stencilf.f deleted file mode 100644 index 44f4ea04..00000000 --- a/benchmarks/stencilf.f +++ /dev/null @@ -1,32 +0,0 @@ - subroutine stencilf(A, B, N, iters) - integer N, iters - double precision A(N,N,N), B(N,N,N) - integer i,j,k,z - double precision c - c = 1 / 7. - - do z=1,iters - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - A(i,j,k) = c * (B(i,j,k) + B(i+1,j,k) + B(i-1,j,k) - . + B(i,j+1,k) + B(i,j-1,k) + B(i,j,k+1) + B(i,j,k-1)) - enddo - enddo - enddo - - do k=2,N-1 - do j=2,N-1 - do i=2,N-1 - B(i,j,k) = c * (A(i,j,k) + A(i+1,j,k) + A(i-1,j,k) - . + A(i,j+1,k) + A(i,j-1,k) + A(i,j,k+1) + A(i,j,k-1)) - enddo - enddo - enddo - - enddo - - return - end - diff --git a/benchmarks/stencilf2.f b/benchmarks/stencilf2.f deleted file mode 100644 index 400dadb1..00000000 --- a/benchmarks/stencilf2.f +++ /dev/null @@ -1,48 +0,0 @@ -! Tiled fortran stencil - - SUBROUTINE stencilftiled(A, B, N, iters) - INTEGER N, iters - DOUBLE PRECISION A(N,N,N), B(N,N,N) - INTEGER z - DOUBLE PRECISION c - c = 1 / 7. - - DO z=1,iters - CALL tiledStencil(A, B, N, c) - CALL tiledStencil(B, A, N, c) - ENDDO - - RETURN - END - - SUBROUTINE tiledStencil(A, B, N, c) - INTEGER N - DOUBLE PRECISION A(N,N,N), B(N,N,N) - DOUBLE PRECISION c - INTEGER i,j,k,bi,bj,bk,ni,nj,nk - INTEGER blockSize - - blockSize = 16 - - DO bi=2,N-1,blockSize - DO bj=2,N-1,blockSize - DO bk=2,N-1,blockSize - ni = min(bi+blockSize-1,N-1) - nj = min(bj+blockSize-1,N-1) - nk = min(bk+blockSize-1,N-1) - - DO k=bk,nk - DO j=bj,nj - DO i=bi,ni - A(i,j,k) = c * (B(i,j,k) + B(i+1,j,k) + B(i-1,j,k) - . + B(i,j+1,k) + B(i,j-1,k) + B(i,j,k+1) + B(i,j,k-1)) - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - ENDDO - - RETURN - END - diff --git a/benchmarks/stencilf90.f90 b/benchmarks/stencilf90.f90 deleted file mode 100644 index 08d787b4..00000000 --- a/benchmarks/stencilf90.f90 +++ /dev/null @@ -1,19 +0,0 @@ -SUBROUTINE stencilf90(A, B, n, iters) - IMPLICIT NONE - INTEGER, INTENT( IN ) :: n, iters - DOUBLE PRECISION, DIMENSION (n,n,n) :: A, B - DOUBLE PRECISION :: c - INTEGER :: count - - c = 1 / 7. - - DO count=1,iters - A(2:N-1,2:N-1,2:N-1) = c * (B(2:N-1,2:N-1,2:N-1) + B(3:N,2:N-1,2:N-1) & - + B(1:N-2,2:N-1,2:N-1) + B(2:N-1,3:N,2:N-1) + B(2:N-1,1:N-2,2:N-1) & - + B(2:N-1,2:N-1,3:N) + B(2:N-1,2:N-1,1:N-2)) - - B(2:N-1,2:N-1,2:N-1) = c * (A(2:N-1,2:N-1,2:N-1) + A(3:N,2:N-1,2:N-1) & - + A(1:N-2,2:N-1,2:N-1) + A(2:N-1,3:N,2:N-1) + A(2:N-1,1:N-2,2:N-1) & - + A(2:N-1,2:N-1,3:N) + A(2:N-1,2:N-1,1:N-2)) - END DO -END SUBROUTINE diff --git a/benchmarks/stenciln.cpp b/benchmarks/stenciln.cpp deleted file mode 100644 index 0a9ad655..00000000 --- a/benchmarks/stenciln.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include - -using namespace blitz; - -BZ_DECLARE_STENCIL4(acoustic2D_stencil,P1,P2,P3,c) - P3 = 2 * P2 + c * Laplacian2D(P2) - P1; -BZ_END_STENCIL - -int benchmark(int N, int nIterations, int blockSize) -{ - Array P1, P2, P3, c; - allocateArrays(shape(N,N), P1, P2, P3, c); - - // Initial conditions: obviously in a real application these - // wouldn't be zeroed... - Range I(0,blockSize-1), J(0,blockSize-1); - - P1(I,J) = 0; - P2(I,J) = 0; - P3(I,J) = 0; - c(I,J) = 0; - - for (int i=0; i < nIterations; ++i) - { - // Apply the stencil object to the arrays - applyStencil(acoustic2D_stencil(), P1(I,J), P2(I,J), P3(I,J), c(I,J)); - - // Set [P1,P2,P3] <- [P2,P3,P1] to set up for the next - // time step - cycleArrays(P1,P2,P3); - } - - return 0; -} - -int main() -{ - Timer timer; - - cout << "N\tMflops" << endl; - - const int blockSize = 27; - - for (int N=2000; N < 2100; ++N) - { - double stencilPoints = pow(blockSize-2,2.0); - int nIterations = 5000; - - timer.start(); - benchmark(N, nIterations, blockSize); - timer.stop(); - - double flops = (4 + 7) * stencilPoints * nIterations; - double Mflops = flops / timer.elapsedSeconds() / 1.0E+6; - cout << N << "\t" << Mflops << endl; - } - - return 0; -} - diff --git a/benchmarks/tiny3.cpp b/benchmarks/tiny3.cpp deleted file mode 100644 index d87c35de..00000000 --- a/benchmarks/tiny3.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include - -using namespace blitz; - -template -void optimizationSink(T&); - -int main() -{ - TinyMatrix A1, A2; - TinyVector b1, b2, c1, c2; - Timer timer; - - const int iters = 9000000; - - for (int i=0; i < 3; ++i) - { - for (int j=0; j < 3; ++j) - A1(i,j) = 1.0; - b1(i) = 1.0; - b2(i) = 1.0; - } - - optimizationSink(A1); - optimizationSink(A2); - optimizationSink(b1); - optimizationSink(b2); - - timer.start(); - for (int i=0; i < iters; ++i) - { - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - c1 = product(A1,b1); - c2 = product(A1,b2); - b1 = product(A1,c1); - c2 = product(A1,c2); - } - - timer.stop(); - - double ops = 64.0 * iters; - double flops = ops * 15; - double seconds = timer.elapsedSeconds(); - - double timePerOp = seconds / ops; - double cycles = timePerOp * (100.0 * 1e+6); - - cout << "ops = " << ops << endl - << "seconds = " << seconds << endl; - - cout << "timePerOp = " << timePerOp << endl - << "cycles = " << cycles << endl; - - double Mflops = flops / seconds / 1.0e+6; - cout << "Mflops = " << Mflops << endl; - - optimizationSink(c1); - optimizationSink(c2); - - return 0; -} - -template -void optimizationSink(T&) -{ -} - diff --git a/benchmarks/tinydaxpy.cpp b/benchmarks/tinydaxpy.cpp deleted file mode 100644 index 40499c39..00000000 --- a/benchmarks/tinydaxpy.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// TinyVector DAXPY benchmark - -//#define BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE - -#include -#include -#include - -using namespace blitz; - -ranlib::Uniform rnd; - -template -void optimizationSink(T&); - -template -void tinyDAXPYBenchmark(TinyVector, int iters, double a) -{ - Timer timer; - - TinyVector ta, tb, tc, td, te, tf, tg, th, ti, tj; - for (int i=0; i < N_rank; ++i) - { - ta[i] = rnd.random()+1; - tb[i] = rnd.random()+1; - tc[i] = rnd.random()+1; - td[i] = rnd.random()+1; - te[i] = rnd.random()+1; - tf[i] = rnd.random()+1; - tg[i] = rnd.random()+1; - th[i] = rnd.random()+1; - ti[i] = rnd.random()+1; - tj[i] = rnd.random()+1; - } - - double b = -a; - - double numFlops = 0; - - if (N_rank < 20) - { - timer.start(); - for (int i=0; i < iters; ++i) - { - ta += a * tb; - tc += a * td; - te += a * tf; - tg += a * th; - ti += a * tj; - tb += b * ta; - td += b * tc; - tf += b * te; - th += b * tg; - tj += b * ti; - ta += a * tb; - tc += a * td; - te += a * tf; - tg += a * th; - ti += a * tj; - tb += b * ta; - td += b * tc; - tf += b * te; - th += b * tg; - tj += b * ti; - } - timer.stop(); - numFlops = 40.0 * N_rank * double(iters); - } - else { - timer.start(); - for (int i=0; i < iters; ++i) - { - ta += a * tb; - tb += b * ta; - } - timer.stop(); - numFlops = 4.0 * N_rank * double(iters); - } - - optimizationSink(ta); - optimizationSink(tb); - optimizationSink(tc); - optimizationSink(td); - optimizationSink(te); - optimizationSink(tf); - optimizationSink(tg); - optimizationSink(th); - optimizationSink(ti); - optimizationSink(tj); - - timer.stop(); - float Gflops = numFlops / (1e9*timer.elapsed()); - - if (iters > 1) - { - cout << setw(5) << N_rank << '\t' << Gflops << endl; - } -} - -double a = 0.3429843; - -template -void optimizationSink(T&) -{ -} - -int main() -{ - cout << "TinyVector DAXPY benchmark" << endl - << setw(5) << "N" << '\t' << "Gflops/" << Timer::indep_var() << endl; - tinyDAXPYBenchmark(TinyVector(), 800000, a); - tinyDAXPYBenchmark(TinyVector(), 800000, a); - tinyDAXPYBenchmark(TinyVector(), 800000, a); - tinyDAXPYBenchmark(TinyVector(), 700000, a); - tinyDAXPYBenchmark(TinyVector(), 600000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - tinyDAXPYBenchmark(TinyVector(), 500000, a); - - return 0; -} - diff --git a/blitz/CMakeLists.txt b/blitz/CMakeLists.txt index a3839b16..febbc930 100644 --- a/blitz/CMakeLists.txt +++ b/blitz/CMakeLists.txt @@ -1,4 +1,8 @@ -include(CheckCXXFeatures) +add_library(BlitzHeaders INTERFACE) +add_library(Blitz::BlitzHeaders ALIAS BlitzHeaders) + +target_include_directories(BlitzHeaders INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/..) + option(BZ_FULLY64BIT "Enable 64 dimensions with > 2^31 elements (NOT IMPLEMENTED)" OFF) option(BZ_THREADSAFE "Enable Blitz thread-safety features" OFF) @@ -24,89 +28,34 @@ else() set(BZ_SIMD_WIDTH 1) endif() -set(EXTRA_LIBS) -option(ENABLE_SERIALISATION "Enable serialization support using Boost::Serialization" OFF) -if (ENABLE_SERIALISATION) - find_package(Boost COMPONENTS serialization) - if (Boost_FOUND) - set(BZ_HAVE_BOOST TRUE) - include_directories(${Boost_INCLUDE_DIRS}) - if (Boost_SERIALIZATION_FOUND) - set(DEP_PKGS ${DEP_PKGS} Boost) - set(BOOST_command "Boost COMPONENTS serialization" PARENT_SCOPE) - set(BZ_HAVE_BOOST_SERIALIZATION TRUE) - set(EXTRA_LIBS ${EXTRA_LIBS} ${BOOST_LIBRARIES}) - else() - message(WARNING "Boost serialization library not found !") - endif() - endif() -endif() - -find_library(BZ_HAVE_LIBPAPI papi) -if (BZ_HAVE_LIBPAPI) - set(DEP_LIBS ${DEP_LIBS} papi) - set(EXTRA_LIBS ${EXTRA_LIBS} ${BZ_HAVE_LIBPAPI}) - set(PKGCONFIG_LIBS ${PKGCONFIG_LIBS} -lpapi) -endif() +find_package(Boost 1.71 COMPONENTS serialization) -find_library(BZ_HAVE_LIBM m) -if (BZ_HAVE_LIBM) - set(DEP_LIBS ${DEP_LIBS} m) - set(EXTRA_LIBS ${EXTRA_LIBS} ${BZ_HAVE_LIBM}) - set(PKGCONFIG_LIBS ${PKGCONFIG_LIBS} -lm) +if (TARGET Boost::serialization) + target_compile_definitions(BlitzHeaders INTERFACE BZ_HAVE_BOOST_SERIALIZATION) + target_link_libraries(BlitzHeaders INTERFACE Boost::serialization) endif() -set(BLITZ_EXTRA_LIBRARIES ${EXTRA_LIBS} PARENT_SCOPE) -set(DEPS ${DEP_PKGS} LIBRARIES ${DEP_LIBS} PARENT_SCOPE) -if (NOT CHECKED_COMPILER_CXX_FEATURES) - CHECK_ALL_CXX_FEATURES(BZ_) - CHECK_HEADER(HAVE_INTTYPES_H inttypes.h) - set(BZ_MATH_ABSINT_IN_NAMESPACE_STD ${BZ_HAVE_MATH_ABSINT_IN_NAMESPACE_STD}) - set(BZ_MATH_FN_IN_NAMESPACE_STD ${BZ_HAVE_MATH_FN_IN_NAMESPACE_STD}) - set(BZ_ISNAN_IN_NAMESPACE_STD ${BZ_HAVE_ISNAN_IN_NAMESPACE_STD}) - set(BZ_HAVE_NCEG_RESTRICT_EGCS ${BZ_HAVE_RESTRICT_EGCS}) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.cmake.h.in ${BLITZ_CONFIG_FILE}) - set(CHECKED_COMPILER_CXX_FEATURES TRUE CACHE INTERNAL "Gating variable to avoid checking compiler features multiple times") +string(TOLOWER ${CMAKE_BUILD_TYPE} cmake_build_type_tolower) +if (cmake_build_type_tolower STREQUAL "debug") + message(STATUS "Blitz debugging flag on") + target_compile_definitions(BlitzHeaders INTERFACE BZ_DEBUG) endif() -macro(GENERATE_BLITZ_HEADER header prog var) - add_custom_target(${header} ALL) - add_dependencies(${header} ${prog}) - add_dependencies(generated-headers ${header}) - add_custom_command( - COMMAND $ - TARGET ${header} - WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/blitz/generate - OUTPUTS ${header} - ) - set(${var} ${${var}} ${CMAKE_CURRENT_BINARY_DIR}/${header}) -endmacro() +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in config.h) add_subdirectory(generate) add_subdirectory(meta) add_subdirectory(array) -set(GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(vecbops.cc genvecbops GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(vecuops.cc genvecuops GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(vecwhere.cc genvecwhere GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(vecbfn.cc genvecbfn GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(matbops.h genmatbops GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(matuops.h genmatuops GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(mathfunc.h genmathfunc GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(promote-old.h genpromote GENERATED_HEADERS) - -set(BLITZ_HEADERS - array-impl.h array.cc array.h bench.cc bench.h benchext.cc benchext.h blitz.h bounds.h bzdebug.h bzconfig.h - compiler.h constpointerstack.h etbase.h et-forward.h funcs.h globeval.cc indexexpr.h indexmap-forward.h - levicivita.h limits-hack.h listinit.h memblock.cc memblock.h minmax.h numinquire.h numtrait.h ops.h - prettyprint.h promote.h range.h range.cc ranks.h reduce.h shapecheck.h simdtypes.h tau.h timer.h tinymat2.h - tinymat2.cc tinymat2io.cc tinyvec2.cc tinyvec2.h tinyvec2io.cc tm2fastiter.h tmevaluate.h tv2fastiter.h - tvevaluate.h traversal.cc traversal.h tuning.h tvcross.h tvecglobs.h update.h wrap-climits.h) - -foreach (i ${BLITZ_HEADERS}) - set(HEADERS ${HEADERS} ${CMAKE_CURRENT_SOURCE_DIR}/${i}) -endforeach() - -install(FILES ${HEADERS} ${GENERATED_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/blitz) -install(FILES ${BLITZ_CONFIG_FILE} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${BLITZ_CONFIG_INSTALL_DIR}) +target_link_libraries(BlitzHeaders INTERFACE Blitz::Array Blitz::Meta Boost::serialization) + +#install(FILES + #${CMAKE_CURRENT_BINARY_DIR}/config.h + #array-impl.h array.cc array.h bench.cc bench.h benchext.cc benchext.h blitz.h bounds.h bzdebug.h + #compiler.h constpointerstack.h etbase.h et-forward.h funcs.h globeval.cc indexexpr.h indexmap-forward.h + #levicivita.h limits-hack.h listinit.h memblock.cc memblock.h minmax.h numinquire.h numtrait.h ops.h + #prettyprint.h promote.h range.h range.cc ranks.h reduce.h shapecheck.h simdtypes.h tau.h timer.h tinymat2.h + #tinymat2.cc tinymat2io.cc tinyvec2.cc tinyvec2.h tinyvec2io.cc tm2fastiter.h tmevaluate.h tv2fastiter.h + #tvevaluate.h traversal.cc traversal.h tuning.h tvcross.h tvecglobs.h update.h wrap-climits.h + #${GENERATED_HEADERS} + #DESTINATION include/blitz) diff --git a/blitz/array-impl.h b/blitz/array-impl.h index 185914cd..1105dcf5 100644 --- a/blitz/array-impl.h +++ b/blitz/array-impl.h @@ -40,6 +40,8 @@ * - apply */ +#if !BOOST_PP_IS_ITERATING + #ifndef BZ_ARRAY_H #define BZ_ARRAY_H @@ -63,6 +65,12 @@ #endif +#include +#include +#include +#include + + namespace blitz { /* @@ -866,12 +874,11 @@ class Array : public MemoryBlockReference * a combination of integer and Range arguments. It's not intended * for end-user use. */ - template - Array(Array& array, R0 r0, R1 r1, R2 r2, - R3 r3, R4 r4, R5 r5, R6 r6, R7 r7, R8 r8, R9 r9, R10 r10) + + template + Array(Array& array, BOOST_PP_ENUM_BINARY_PARAMS(BLITZ_ARRAY_LARGEST_RANK,R,r)) { - constructSlice(array, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10); + constructSlice(array, BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,r)); } ////////////////////////////////////////////// @@ -1026,12 +1033,12 @@ class Array : public MemoryBlockReference const TinyVector& ordering() const { return storage_.ordering(); } - void transposeSelf(int r0, int r1, int r2=0, - int r3=0, int r4=0, int r5=0, int r6=0, int r7=0, int r8=0, int - r9=0, int r10=0); - T_array transpose(int r0, int r1, int r2=0, - int r3=0, int r4=0, int r5=0, int r6=0, int r7=0, int r8=0, int - r9=0, int r10=0) const; +#define ARGUMENTS_here BOOST_PP_ENUM_PARAMS(BOOST_PP_SUB(BLITZ_ARRAY_LARGEST_RANK,2), int=0 BOOST_PP_INTERCEPT ) + + void transposeSelf(int r0, int r1, ARGUMENTS_here); + T_array transpose (int r0, int r1, ARGUMENTS_here) const; + +#undef ARGUMENTS_here static int rank() { return rank_; } @@ -1981,101 +1988,15 @@ class Array : public MemoryBlockReference #ifdef BZ_HAVE_PARTIAL_ORDERING - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection()); - } +#define DEFAULT_print(z, n, data) nilArraySection() +#define BOOST_PP_ITERATION_LIMITS (2,BLITZ_ARRAY_LARGEST_RANK) +#define BOOST_PP_FILENAME_1 "blitz/array-impl.h" - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } +#include BOOST_PP_ITERATE() - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, nilArraySection(), - nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, nilArraySection(), nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, nilArraySection(), nilArraySection(), - nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, - nilArraySection(), nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, r9, nilArraySection(), nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, nilArraySection()); - } - - template - typename SliceInfo::T_slice - operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const - { - typedef typename SliceInfo::T_slice slice; - return slice(noConst(), r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11); - } +#undef BOOST_PP_FILENAME_1 +#undef BOOST_PP_ITERATION_LIMITS +#undef DEFAULT_print #endif // BZ_HAVE_PARTIAL_ORDERING @@ -2442,10 +2363,9 @@ class Array : public MemoryBlockReference void calculateZeroOffset(); - template - void constructSlice(Array& array, R0 r0, R1 r1, R2 r2, - R3 r3, R4 r4, R5 r5, R6 r6, R7 r7, R8 r8, R9 r9, R10 r10); + + template + void constructSlice(Array& array, BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,R)); template void slice(int& setRank, Range r, Array& array, @@ -2561,3 +2481,23 @@ void find(Array,1>& indices, #endif // BZ_ARRAY_H + + +#else // BOOST_PP_IS_ITERATING + + +#define N BOOST_PP_ITERATION() + +template +typename SliceInfo::T_slice +operator()(BOOST_PP_ENUM_BINARY_PARAMS(N,T,r)) const +{ + typedef typename SliceInfo::T_slice slice; + return slice(noConst(), BOOST_PP_ENUM_PARAMS(N,r) BOOST_PP_ENUM_TRAILING(BOOST_PP_SUB(BLITZ_ARRAY_LARGEST_RANK,N),DEFAULT_print,~) ); +} + + +#undef N + + +#endif // BOOST_PP_IS_ITERATING diff --git a/blitz/array/CMakeLists.txt b/blitz/array/CMakeLists.txt index 0fbc7c48..e7dcfa3a 100644 --- a/blitz/array/CMakeLists.txt +++ b/blitz/array/CMakeLists.txt @@ -1,23 +1,12 @@ -set(BLITZ_ARRAY_HEADERS - asexpr.h asexpr.cc cartesian.h cgsolve.h complex.cc convolve.cc convolve.h cycle.cc domain.h et.h expr.h expr.cc - fastiter.h funcs.h functorExpr.h geometry.h indirect.h interlace.cc io.cc iter.h map.h methods.cc misc.cc multi.h - newet-macros.h newet.h ops.cc ops.h reduce.cc reduce.h resize.cc shape.h slice.h slicing.cc stencil-et.h - stencil-et-macros.h stencilops.h stencils.cc stencils.h storage.h where.h zip.h) - -foreach(i ${BLITZ_ARRAY_HEADERS}) - set(HEADERS ${HEADERS} ${CMAKE_CURRENT_SOURCE_DIR}/${i}) -endforeach() - -GENERATE_BLITZ_HEADER(bops.cc genarrbops BLITZ_ARRAY_GENERATED_HEADERS) -GENERATE_BLITZ_HEADER(uops.cc genarruops BLITZ_ARRAY_GENERATED_HEADERS) - -find_package (Python COMPONENTS Interpreter) - -add_custom_target(stencil-classes.cc ALL - COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../generate/genstencils.py stencil-classes.cc - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/../generate/genstencils.py) -add_dependencies(generated-headers stencil-classes.cc) - -set(BLITZ_ARRAY_GENERATED_HEADERS ${BLITZ_ARRAY_GENERATED_HEADERS} ${CMAKE_CURRENT_BINARY_DIR}/stencil-classes.cc) - -install(FILES ${HEADERS} ${BLITZ_ARRAY_GENERATED_HEADERS} DESTINATION include/blitz/array) +add_library(Array INTERFACE) +add_library(Blitz::Array ALIAS Array) + +target_include_directories(Array INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/../..") + +#install(FILES + #asexpr.h asexpr.cc cartesian.h cgsolve.h complex.cc convolve.cc convolve.h cycle.cc domain.h et.h expr.h expr.cc + #fastiter.h funcs.h functorExpr.h geometry.h indirect.h interlace.cc io.cc iter.h map.h methods.cc misc.cc multi.h + #newet-macros.h newet.h ops.cc ops.h reduce.cc reduce.h resize.cc shape.h slice.h slicing.cc stencil-et.h + #stencil-et-macros.h stencilops.h stencils.cc stencils.h storage.h where.h zip.h + #${BLITZ_ARRAY_GENERATED_HEADERS} + #DESTINATION include/blitz/array) diff --git a/blitz/array/asexpr.cc b/blitz/array/asexpr.cc index 7dd2d1da..90411366 100644 --- a/blitz/array/asexpr.cc +++ b/blitz/array/asexpr.cc @@ -43,36 +43,36 @@ namespace blitz { // default to scalar template -_bz_typename asExpr::T_expr asExpr::getExpr(const T& x) +typename asExpr::T_expr asExpr::getExpr(const T& x) { return T_expr(x); } // expression template term just returns itself template -const _bz_typename asExpr<_bz_ArrayExpr >::T_expr& +const typename asExpr<_bz_ArrayExpr >::T_expr& asExpr<_bz_ArrayExpr >::getExpr(const T_expr& x) { return x; } // array operand returns iterator template -_bz_typename asExpr >::T_expr +typename asExpr >::T_expr asExpr >::getExpr(const Array& x) { return x.beginFast(); } // tinyvector operand returns iterator template -_bz_typename asExpr >::T_expr +typename asExpr >::T_expr asExpr >::getExpr(const TinyVector& x) { return x.beginFast(); } // tinymatrix operands returns iterator template -_bz_typename asExpr >::T_expr +typename asExpr >::T_expr asExpr >::getExpr(const TinyMatrix& x) { return x.beginFast(); } // Index placeholder returns itself template -_bz_typename asExpr >::T_expr +typename asExpr >::T_expr asExpr >::getExpr(const T_expr& x) { return x; } diff --git a/blitz/array/cartesian.h b/blitz/array/cartesian.h index 51f98039..bd61a305 100644 --- a/blitz/array/cartesian.h +++ b/blitz/array/cartesian.h @@ -244,7 +244,7 @@ void CartesianProduct::debugDump() for (int i=0; i < N_containers; ++i) { cout << "Container " << (i+1) << ": "; - _bz_typename T_container::const_iterator iter = containers_[i]->begin(), + typename T_container::const_iterator iter = containers_[i]->begin(), end = containers_[i]->end(); for (; iter != end; ++iter) cout << (*iter) << '\t'; @@ -254,7 +254,7 @@ void CartesianProduct::debugDump() template class CartesianProductIterator { public: - typedef _bz_typename T_container::const_iterator citerator; + typedef typename T_container::const_iterator citerator; typedef CartesianProductIterator iterator; typedef CartesianProduct T_cp; diff --git a/blitz/array/expr.h b/blitz/array/expr.h index 11e7c04a..ab028b91 100644 --- a/blitz/array/expr.h +++ b/blitz/array/expr.h @@ -109,7 +109,7 @@ class _bz_ArrayExpr public: typedef P_expr T_expr; - typedef _bz_typename T_expr::T_numtype T_numtype; + typedef typename T_expr::T_numtype T_numtype; // select return type typedef typename unwrapET::T_unwrapped test; typedef typename selectET T_range_result; + typedef _bz_ArrayExpr T_range_result; static const int numArrayOperands = T_expr::numArrayOperands, @@ -156,7 +156,7 @@ class _bz_ArrayExpr : iter_(a) { } #if !defined(__MWERKS__) - _bz_ArrayExpr(BZ_ETPARM(_bz_typename T_expr::T_ctorArg1) a) + _bz_ArrayExpr(BZ_ETPARM(typename T_expr::T_ctorArg1) a) : iter_(a) { } #endif @@ -502,8 +502,8 @@ class _bz_ArrayExprUnaryOp { public: typedef P_expr T_expr; typedef P_op T_op; - typedef _bz_typename T_expr::T_numtype T_numtype1; - typedef _bz_typename T_op::T_numtype T_numtype; + typedef typename T_expr::T_numtype T_numtype1; + typedef typename T_op::T_numtype T_numtype; // select return type typedef typename unwrapET::T_unwrapped test; @@ -516,7 +516,7 @@ class _bz_ArrayExprUnaryOp { typedef T_expr T_ctorArg1; typedef int T_ctorArg2; // dummy - typedef _bz_ArrayExprUnaryOp<_bz_typename P_expr::T_range_result, + typedef _bz_ArrayExprUnaryOp T_range_result; static const int @@ -543,7 +543,7 @@ class _bz_ArrayExprUnaryOp { { } /* - _bz_ArrayExprUnaryOp(_bz_typename T_expr::T_ctorArg1 a) + _bz_ArrayExprUnaryOp(typename T_expr::T_ctorArg1 a) : iter_(a) { } */ @@ -778,9 +778,9 @@ class _bz_ArrayExprBinaryOp { typedef P_expr1 T_expr1; typedef P_expr2 T_expr2; typedef P_op T_op; - typedef _bz_typename T_expr1::T_numtype T_numtype1; - typedef _bz_typename T_expr2::T_numtype T_numtype2; - typedef _bz_typename T_op::T_numtype T_numtype; + typedef typename T_expr1::T_numtype T_numtype1; + typedef typename T_expr2::T_numtype T_numtype2; + typedef typename T_op::T_numtype T_numtype; // select return type typedef typename unwrapET::T_unwrapped T_unwrapped1; @@ -796,8 +796,8 @@ class _bz_ArrayExprBinaryOp { typedef T_expr1 T_ctorArg1; typedef T_expr2 T_ctorArg2; - typedef _bz_ArrayExprBinaryOp<_bz_typename P_expr1::T_range_result, - _bz_typename P_expr2::T_range_result, + typedef _bz_ArrayExprBinaryOp T_range_result; static const int @@ -1101,10 +1101,10 @@ class _bz_ArrayExprTernaryOp { typedef P_expr2 T_expr2; typedef P_expr3 T_expr3; typedef P_op T_op; - typedef _bz_typename T_expr1::T_numtype T_numtype1; - typedef _bz_typename T_expr2::T_numtype T_numtype2; - typedef _bz_typename T_expr3::T_numtype T_numtype3; - typedef _bz_typename T_op::T_numtype T_numtype; + typedef typename T_expr1::T_numtype T_numtype1; + typedef typename T_expr2::T_numtype T_numtype2; + typedef typename T_expr3::T_numtype T_numtype3; + typedef typename T_op::T_numtype T_numtype; // select return type typedef typename unwrapET< @@ -1132,9 +1132,9 @@ class _bz_ArrayExprTernaryOp { typedef T_expr1 T_ctorArg1; typedef T_expr2 T_ctorArg2; typedef T_expr3 T_ctorArg3; - typedef _bz_ArrayExprTernaryOp<_bz_typename P_expr1::T_range_result, - _bz_typename P_expr2::T_range_result, - _bz_typename P_expr3::T_range_result, P_op> T_range_result; + typedef _bz_ArrayExprTernaryOp T_range_result; static const int numArrayOperands = T_expr1::numArrayOperands @@ -1507,11 +1507,11 @@ class _bz_ArrayExprQuaternaryOp { typedef P_expr3 T_expr3; typedef P_expr4 T_expr4; typedef P_op T_op; - typedef _bz_typename T_expr1::T_numtype T_numtype1; - typedef _bz_typename T_expr2::T_numtype T_numtype2; - typedef _bz_typename T_expr3::T_numtype T_numtype3; - typedef _bz_typename T_expr4::T_numtype T_numtype4; - typedef _bz_typename T_op::T_numtype T_numtype; + typedef typename T_expr1::T_numtype T_numtype1; + typedef typename T_expr2::T_numtype T_numtype2; + typedef typename T_expr3::T_numtype T_numtype3; + typedef typename T_expr4::T_numtype T_numtype4; + typedef typename T_op::T_numtype T_numtype; // select return type typedef typename unwrapET::T_unwrapped T_unwrapped1; @@ -1542,10 +1542,10 @@ class _bz_ArrayExprQuaternaryOp { typedef T_expr2 T_ctorArg2; typedef T_expr3 T_ctorArg3; typedef T_expr4 T_ctorArg4; - typedef _bz_ArrayExprQuaternaryOp<_bz_typename P_expr1::T_range_result, - _bz_typename P_expr2::T_range_result, - _bz_typename P_expr3::T_range_result, - _bz_typename P_expr4::T_range_result, + typedef _bz_ArrayExprQuaternaryOp T_range_result; static const int diff --git a/blitz/array/funcs.h b/blitz/array/funcs.h index 628bf966..32953bb5 100644 --- a/blitz/array/funcs.h +++ b/blitz/array/funcs.h @@ -121,13 +121,13 @@ BZ_DECLARE_ARRAY_ET_UNARY(uitrunc, Fn_uitrunc) template _bz_inline_et -_bz_ArrayExpr<_bz_ArrayExprUnaryOp<_bz_typename asExpr::T_expr, - Cast<_bz_typename asExpr::T_expr::T_numtype, T_cast> > > +_bz_ArrayExpr<_bz_ArrayExprUnaryOp::T_expr, + Cast::T_expr::T_numtype, T_cast> > > cast(const ETBase& expr) { return _bz_ArrayExpr<_bz_ArrayExprUnaryOp< - _bz_typename asExpr::T_expr, - Cast<_bz_typename asExpr::T_expr::T_numtype,T_cast> > > + typename asExpr::T_expr, + Cast::T_expr::T_numtype,T_cast> > > (expr.unwrap()); } @@ -211,7 +211,7 @@ pow(const complex d1, const ETBase& d2) // we define a generalized dot product for all classes as sum(a*b) template inline -_bz_typename ReduceSum<_bz_typename blitz::BzBinaryExprResult::T_result::T_numtype +typename ReduceSum::T_result::T_numtype >::T_resulttype dot(const ETBase& d1, const ETBase& d2) { @@ -230,29 +230,29 @@ _bz_ArrayExpr< _bz_ArrayExprReduce< _bz_ArrayExpr< _bz_ArrayExprReduce< - _bz_typename BzBinaryExprResult< + typename BzBinaryExprResult< Multiply, - _bz_typename BzBinaryExprResult< + typename BzBinaryExprResult< Multiply, _bz_ArrayExpr, _bz_ArrayExpr< ArrayIndexMapping< - _bz_typename asExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> + typename asExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result, _bz_ArrayExpr< ArrayIndexMapping< - _bz_typename asExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> + typename asExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result, 2, ReduceSum< - _bz_typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype, - BZ_SUMTYPE(bzCC(_bz_typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> + typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype, + BZ_SUMTYPE(bzCC(typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> > >, 1, - ReduceSum,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> + ReduceSum,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> > > @@ -263,29 +263,29 @@ _bz_ArrayExpr< _bz_ArrayExprReduce< _bz_ArrayExpr< _bz_ArrayExprReduce< - _bz_typename BzBinaryExprResult< + typename BzBinaryExprResult< Multiply, - _bz_typename BzBinaryExprResult< + typename BzBinaryExprResult< Multiply, _bz_ArrayExpr, _bz_ArrayExpr< ArrayIndexMapping< - _bz_typename asExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> + typename asExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result, _bz_ArrayExpr< ArrayIndexMapping< - _bz_typename asExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> + typename asExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result, 2, ReduceSum< - _bz_typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype, - BZ_SUMTYPE(bzCC(_bz_typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> + typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype, + BZ_SUMTYPE(bzCC(typename BzBinaryExprResult,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> > >, 1, - ReduceSum,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> + ReduceSum,_bz_ArrayExpr::T_expr, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > >::T_result,_bz_ArrayExpr::T_expr, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> > > ::T_result::T_numtype))> > >* x; //int a=*x; diff --git a/blitz/array/functorExpr.h b/blitz/array/functorExpr.h index 1d002a7c..755d936c 100644 --- a/blitz/array/functorExpr.h +++ b/blitz/array/functorExpr.h @@ -108,7 +108,7 @@ class _bz_FunctorExpr { public: typedef P_functor T_functor; typedef P_expr T_expr; - typedef _bz_typename T_expr::T_numtype T_numtype1; + typedef typename T_expr::T_numtype T_numtype1; typedef P_result T_numtype; // select return type @@ -124,7 +124,7 @@ class _bz_FunctorExpr { typedef T_expr T_ctorArg1; typedef int T_ctorArg2; // dummy typedef int T_ctorArg3; // dummy - typedef _bz_FunctorExpr T_range_result; static const int @@ -152,7 +152,7 @@ class _bz_FunctorExpr { { } // this is identical to the above constructor - //_bz_FunctorExpr(BZ_ETPARM(T_functor) f, _bz_typename T_expr::T_ctorArg1 a) + //_bz_FunctorExpr(BZ_ETPARM(T_functor) f, typename T_expr::T_ctorArg1 a) //: f_(f), iter_(a) { } #if BZ_TEMPLATE_CTOR_DOESNT_CAUSE_HAVOC @@ -362,8 +362,8 @@ class _bz_FunctorExpr2 typedef P_functor T_functor; typedef P_expr1 T_expr1; typedef P_expr2 T_expr2; - typedef _bz_typename T_expr1::T_numtype T_numtype1; - typedef _bz_typename T_expr2::T_numtype T_numtype2; + typedef typename T_expr1::T_numtype T_numtype1; + typedef typename T_expr2::T_numtype T_numtype2; typedef P_result T_numtype; // select return type @@ -383,8 +383,8 @@ class _bz_FunctorExpr2 typedef T_expr1 T_ctorArg2; typedef int T_ctorArg3; // dummy typedef _bz_FunctorExpr2 T_range_result; static const int @@ -706,9 +706,9 @@ class _bz_FunctorExpr3 typedef P_expr1 T_expr1; typedef P_expr2 T_expr2; typedef P_expr3 T_expr3; - typedef _bz_typename T_expr1::T_numtype T_numtype1; - typedef _bz_typename T_expr2::T_numtype T_numtype2; - typedef _bz_typename T_expr3::T_numtype T_numtype3; + typedef typename T_expr1::T_numtype T_numtype1; + typedef typename T_expr2::T_numtype T_numtype2; + typedef typename T_expr3::T_numtype T_numtype3; typedef P_result T_numtype; // select return type @@ -735,9 +735,9 @@ class _bz_FunctorExpr3 typedef T_expr2 T_ctorArg2; typedef T_expr3 T_ctorArg3; typedef _bz_FunctorExpr3 T_range_result; static const int @@ -1104,53 +1104,53 @@ class _bz_FunctorExpr3 template _bz_inline_et -_bz_ArrayExpr<_bz_FunctorExpr::T_expr, - _bz_typename asExpr::T_expr::T_numtype> > +_bz_ArrayExpr<_bz_FunctorExpr::T_expr, + typename asExpr::T_expr::T_numtype> > applyFunctor(const P_functor& f, const ETBase& a) { typedef _bz_FunctorExpr::T_expr, - _bz_typename asExpr::T_expr::T_numtype> f1; + typename asExpr::T_expr, + typename asExpr::T_expr::T_numtype> f1; return _bz_ArrayExpr(f, a.unwrap()); } template _bz_inline_et _bz_ArrayExpr<_bz_FunctorExpr2::T_expr, - _bz_typename asExpr::T_expr, - BZ_PROMOTE(_bz_typename asExpr::T_expr::T_numtype, - _bz_typename asExpr::T_expr::T_numtype)> > + typename asExpr::T_expr, + typename asExpr::T_expr, + BZ_PROMOTE(typename asExpr::T_expr::T_numtype, + typename asExpr::T_expr::T_numtype)> > applyFunctor(const P_functor& f, const ETBase& a, const ETBase& b) { typedef _bz_FunctorExpr2::T_expr, - _bz_typename asExpr::T_expr, - BZ_PROMOTE(_bz_typename asExpr::T_expr::T_numtype, - _bz_typename asExpr::T_expr::T_numtype)> f2; + typename asExpr::T_expr, + typename asExpr::T_expr, + BZ_PROMOTE(typename asExpr::T_expr::T_numtype, + typename asExpr::T_expr::T_numtype)> f2; return _bz_ArrayExpr(f, a.unwrap(), b.unwrap()); } template _bz_inline_et _bz_ArrayExpr<_bz_FunctorExpr3::T_expr, - _bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr, - BZ_PROMOTE(_bz_typename asExpr::T_expr::T_numtype, - BZ_PROMOTE(_bz_typename asExpr::T_expr::T_numtype, - _bz_typename asExpr::T_expr::T_numtype))> > + typename asExpr::T_expr, + typename asExpr::T_expr, + typename asExpr::T_expr, + BZ_PROMOTE(typename asExpr::T_expr::T_numtype, + BZ_PROMOTE(typename asExpr::T_expr::T_numtype, + typename asExpr::T_expr::T_numtype))> > applyFunctor(const P_functor& f, const ETBase& a, const ETBase& b, const ETBase& c) { typedef _bz_FunctorExpr3::T_expr, - _bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr, - BZ_PROMOTE(_bz_typename asExpr::T_expr::T_numtype, - BZ_PROMOTE(_bz_typename asExpr::T_expr::T_numtype, - _bz_typename asExpr::T_expr::T_numtype))> f3; + typename asExpr::T_expr, + typename asExpr::T_expr, + typename asExpr::T_expr, + BZ_PROMOTE(typename asExpr::T_expr::T_numtype, + BZ_PROMOTE(typename asExpr::T_expr::T_numtype, + typename asExpr::T_expr::T_numtype))> f3; return _bz_ArrayExpr(f, a.unwrap(), b.unwrap(), c.unwrap()); } @@ -1249,14 +1249,14 @@ private: \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr::T_optype> > \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr::T_optype> > \ operator()(const blitz::ETBase& a) const \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr::T_optype> > \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr::T_optype> > \ (*this, a.unwrap()); \ } @@ -1264,22 +1264,22 @@ operator()(const blitz::ETBase& a) const \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype)> > \ operator()(const blitz::ETBase& a, \ const blitz::ETBase& b) const \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr2::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype)> > \ (*this, a.unwrap(), b.unwrap()); \ } @@ -1288,14 +1288,14 @@ operator()(const blitz::ETBase& a, \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - BZ_PROMOTE(_bz_typename \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype))> > \ operator()(const blitz::ETBase& a, \ const blitz::ETBase& b, \ @@ -1303,14 +1303,14 @@ operator()(const blitz::ETBase& a, \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr3::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - BZ_PROMOTE(_bz_typename \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype))> >\ (*this, a.unwrap(), b.unwrap(), c.unwrap()); \ } @@ -1320,13 +1320,13 @@ operator()(const blitz::ETBase& a, \ template \ blitz::_bz_ArrayExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ operator()(const blitz::ETBase& a) const \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ (*this, a.unwrap()); \ } @@ -1335,16 +1335,16 @@ operator()(const blitz::ETBase& a) const \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ operator()(const blitz::ETBase& a, \ const blitz::ETBase& b) const \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr2::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ (*this, a.unwrap(), b.unwrap()); \ } @@ -1353,9 +1353,9 @@ operator()(const blitz::ETBase& a, \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ operator()(const blitz::ETBase& a, \ const blitz::ETBase& b, \ @@ -1363,9 +1363,9 @@ operator()(const blitz::ETBase& a, \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr3::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ (*this, a.unwrap(), b.unwrap(), c.unwrap()); \ } @@ -1376,15 +1376,15 @@ _BZ_MAKE_FUNCTOR(classname, funcname) \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr::T_optype> > \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr::T_optype> > \ funcname(const blitz::ETBase& a) const \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr< \ _bz_Functor ## classname ## funcname, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr::T_optype> > \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr::T_optype> > \ (_bz_Functor ## classname ## funcname(*this), a.unwrap()); \ } @@ -1393,11 +1393,11 @@ _BZ_MAKE_FUNCTOR2(classname, funcname) \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype)> > \ funcname(const blitz::ETBase& a, \ const blitz::ETBase& b) const \ @@ -1405,11 +1405,11 @@ funcname(const blitz::ETBase& a, \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr2< \ _bz_Functor ## classname ## funcname, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype)> > \ (_bz_Functor ## classname ## funcname(*this), a.unwrap(), b.unwrap()); \ } @@ -1419,14 +1419,14 @@ _BZ_MAKE_FUNCTOR3(classname, funcname) \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - BZ_PROMOTE(_bz_typename \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype))> > \ funcname(const blitz::ETBase& a, \ const blitz::ETBase& b, \ @@ -1435,14 +1435,14 @@ funcname(const blitz::ETBase& a, \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr3< \ _bz_Functor ## classname ## funcname, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - BZ_PROMOTE(_bz_typename \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - BZ_PROMOTE(_bz_typename \ + BZ_PROMOTE(typename \ blitz::asExpr::T_expr::T_optype, \ - _bz_typename \ + typename \ blitz::asExpr::T_expr::T_optype))> >\ (_bz_Functor ## classname ## funcname(*this), \ a.unwrap(), b.unwrap(), c.unwrap()); \ @@ -1454,14 +1454,14 @@ _BZ_MAKE_FUNCTOR_RET(classname, funcname, ret) \ template \ blitz::_bz_ArrayExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ funcname(const blitz::ETBase& a) const \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr< \ _bz_Functor ## classname ## funcname, \ - _bz_typename blitz::asExpr::T_expr, ret> > \ + typename blitz::asExpr::T_expr, ret> > \ (_bz_Functor ## classname ## funcname(*this), a.unwrap()); \ } @@ -1470,8 +1470,8 @@ _BZ_MAKE_FUNCTOR2_RET(classname, funcname, ret) \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ funcname(const blitz::ETBase& a, \ const blitz::ETBase& b) const \ @@ -1479,8 +1479,8 @@ funcname(const blitz::ETBase& a, \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr2< \ _bz_Functor ## classname ## funcname, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ (_bz_Functor ## classname ## funcname(*this), a.unwrap(), b.unwrap()); \ } @@ -1490,9 +1490,9 @@ _BZ_MAKE_FUNCTOR3_RET(classname, funcname, ret) \ template \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ funcname(const blitz::ETBase& a, \ const blitz::ETBase& b, \ @@ -1501,9 +1501,9 @@ funcname(const blitz::ETBase& a, \ return blitz::_bz_ArrayExpr< \ blitz::_bz_FunctorExpr3< \ _bz_Functor ## classname ## funcname, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ ret> > \ (_bz_Functor ## classname ## funcname(*this), \ a.unwrap(), b.unwrap(), c.unwrap()); \ diff --git a/blitz/array/indirect.h b/blitz/array/indirect.h index ede9899a..a9dd4b7c 100644 --- a/blitz/array/indirect.h +++ b/blitz/array/indirect.h @@ -65,17 +65,17 @@ inline void applyOverSubdomain(const T_array& array, T_arrayiter& arrayIter, template template void IndirectArray::operator=(T_rhs rhs) { - typedef _bz_typename asExpr::T_expr T_expr; + typedef typename asExpr::T_expr T_expr; T_expr expr(rhs); - _bz_typename T_array::T_iterator arrayIter(array_); + typename T_array::T_iterator arrayIter(array_); - _bz_typename T_index::iterator iter = index_.begin(), + typename T_index::iterator iter = index_.begin(), end = index_.end(); for (; iter != end; ++iter) { - _bz_typename T_index::value_type subdomain = *iter; + typename T_index::value_type subdomain = *iter; applyOverSubdomain(array_, arrayIter, subdomain, expr); } } @@ -94,7 +94,7 @@ inline void applyOverSubdomain(const T_array& BZ_DEBUG_PARAM(array), T_arrayiter arrayIter.moveTo(subdomain); expr.moveTo(subdomain); - *const_cast<_bz_typename T_arrayiter::T_numtype*>(arrayIter.data()) = *expr; + *const_cast(arrayIter.data()) = *expr; } // Specialization for RectDomain @@ -103,7 +103,7 @@ inline void applyOverSubdomain(const T_array& BZ_DEBUG_PARAM(array), T_arrayiter RectDomain subdomain, T_expr expr) { - typedef _bz_typename T_array::T_numtype T_numtype; + typedef typename T_array::T_numtype T_numtype; // Assume that the RectDomain is a 1-D strip. // Find the dimension in which the strip is oriented. This @@ -182,7 +182,7 @@ inline void applyOverSubdomain(const T_array& BZ_DEBUG_PARAM(array), T_arrayiter for (int i=lbound; i <= ubound; ++i) { - *const_cast<_bz_typename T_arrayiter::T_numtype*>(arrayIter.data()) + *const_cast(arrayIter.data()) = *expr; expr.advance(); arrayIter.advance(); @@ -375,7 +375,7 @@ struct cp_findContainerType { template struct cp_findContainerType { - typedef _bz_typename + typedef typename cp_findContainerType::T_container T_container; }; @@ -415,11 +415,11 @@ struct cp_traits { // specified. template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container,2> indexSet(const T1& c1, const T2& c2) { - typedef _bz_typename cp_findContainerType::T_container + typedef typename cp_findContainerType::T_container T_container; return CartesianProduct, T_container, 2>( @@ -428,11 +428,11 @@ indexSet(const T1& c1, const T2& c2) } template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 3> indexSet(const T1& c1, const T2& c2, const T3& c3) { - typedef _bz_typename cp_findContainerType::T_container + typedef typename cp_findContainerType::T_container T_container; return CartesianProduct, T_container, 3>( @@ -442,11 +442,11 @@ indexSet(const T1& c1, const T2& c2, const T3& c3) } template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 4> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4) { - typedef _bz_typename cp_findContainerType::T_container + typedef typename cp_findContainerType::T_container T_container; return CartesianProduct, T_container, 4>( @@ -457,11 +457,11 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4) } template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 5> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5) { - typedef _bz_typename cp_findContainerType::T_container + typedef typename cp_findContainerType::T_container T_container; return CartesianProduct, T_container, 5>( @@ -474,12 +474,12 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5) template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 6> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, const T6& c6) { - typedef _bz_typename cp_findContainerType::T_container + typedef typename cp_findContainerType::T_container T_container; return CartesianProduct, T_container, 6>( @@ -493,12 +493,12 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 7> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, const T6& c6, const T7& c7) { - typedef _bz_typename + typedef typename cp_findContainerType::T_container T_container; @@ -514,12 +514,12 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 8> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, const T6& c6, const T7& c7, const T8& c8) { - typedef _bz_typename + typedef typename cp_findContainerType::T_container T_container; @@ -536,12 +536,12 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 9> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, const T6& c6, const T7& c7, const T8& c8, const T9& c9) { - typedef _bz_typename + typedef typename cp_findContainerType::T_container T_container; @@ -559,12 +559,12 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 10> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, const T6& c6, const T7& c7, const T8& c8, const T9& c9, const T10& c10) { - typedef _bz_typename + typedef typename cp_findContainerType::T_container T_container; @@ -584,13 +584,13 @@ indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, template -CartesianProduct, _bz_typename +CartesianProduct, typename cp_findContainerType::T_container, 11> indexSet(const T1& c1, const T2& c2, const T3& c3, const T4& c4, const T5& c5, const T6& c6, const T7& c7, const T8& c8, const T9& c9, const T10& c10, const T11& c11) { - typedef _bz_typename + typedef typename cp_findContainerType::T_container T_container; diff --git a/blitz/array/map.h b/blitz/array/map.h index a6a4195e..f619e3fc 100644 --- a/blitz/array/map.h +++ b/blitz/array/map.h @@ -436,7 +436,7 @@ class ArrayIndexMapping { { } // this is ambiguous with the above - // ArrayIndexMapping(_bz_typename T_expr::T_ctorArg1 a) + // ArrayIndexMapping(typename T_expr::T_ctorArg1 a) // : iter_(a) // { } diff --git a/blitz/array/methods.cc b/blitz/array/methods.cc index 541cbdd4..44fbe365 100644 --- a/blitz/array/methods.cc +++ b/blitz/array/methods.cc @@ -354,27 +354,36 @@ void Array::makeUnique() } template -Array Array::transpose(int r0, int r1, - int r2, int r3, int r4, int r5, int r6, int r7, int r8, int r9, int r10) const +Array Array::transpose(BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,int r)) const { T_array B(*this); - B.transposeSelf(r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10); + B.transposeSelf(BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,r)); return B; } template -void Array::transposeSelf(int r0, int r1, int r2, int r3, - int r4, int r5, int r6, int r7, int r8, int r9, int r10) +void Array::transposeSelf(BOOST_PP_ENUM_PARAMS(BLITZ_ARRAY_LARGEST_RANK,int r)) { - BZPRECHECK(r0+r1+r2+r3+r4+r5+r6+r7+r8+r9+r10 == N_rank * (N_rank-1) / 2, +#define DEFAULT_print(z, n, data) +r##n + + BZPRECHECK(r0+BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~) == N_rank * (N_rank-1) / 2, "Invalid array transpose() arguments." << endl << "Arguments must be a permutation of the numerals (0,...," << (N_rank - 1) << ")"); +#undef DEFAULT_print + // Create a temporary reference copy of this array Array x(*this); // Now reorder the dimensions using the supplied permutation +#define DEFAULT_print(z, n, data) doTranspose(n, r##n, x); + + BOOST_PP_REPEAT(BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~) + +#undef DEFAULT_print + + doTranspose(0, r0, x); doTranspose(1, r1, x); doTranspose(2, r2, x); diff --git a/blitz/array/misc.cc b/blitz/array/misc.cc index 2d3ee843..eabae8d3 100644 --- a/blitz/array/misc.cc +++ b/blitz/array/misc.cc @@ -50,11 +50,11 @@ fn(const Array& array) \ template \ inline \ _bz_ArrayExpr<_bz_ArrayExprUnaryOp<_bz_ArrayExpr, \ - fnobj<_bz_typename T_expr::T_numtype> > > \ + fnobj > > \ fn(BZ_ETPARM(_bz_ArrayExpr) expr) \ { \ return _bz_ArrayExprUnaryOp<_bz_ArrayExpr, \ - fnobj<_bz_typename T_expr::T_numtype> >(expr); \ + fnobj >(expr); \ } BZ_ARRAY_DECLARE_UOP(operator!, LogicalNot) @@ -78,11 +78,11 @@ cast(const Array& array, T_cast) template inline _bz_ArrayExpr<_bz_ArrayExprUnaryOp<_bz_ArrayExpr, - Cast<_bz_typename T_expr::T_numtype,T_cast> > > + Cast > > cast(BZ_ETPARM(_bz_ArrayExpr) expr, T_cast) { return _bz_ArrayExprUnaryOp<_bz_ArrayExpr, - Cast<_bz_typename T_expr::T_numtype,T_cast> >(expr); + Cast >(expr); } } diff --git a/blitz/array/newet-macros.h b/blitz/array/newet-macros.h index d5ddd58e..294bfad7 100644 --- a/blitz/array/newet-macros.h +++ b/blitz/array/newet-macros.h @@ -194,14 +194,14 @@ name(const blitz::ETBase& d1, \ template \ _bz_inline_et \ blitz::_bz_ArrayExpr::T_expr, \ - functor<_bz_typename blitz::asExpr::T_expr::T_numtype> > >\ + typename blitz::asExpr::T_expr, \ + functor::T_expr::T_numtype> > >\ name(const blitz::ETBase& d1) \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_ArrayExprUnaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ - functor<_bz_typename \ + typename blitz::asExpr::T_expr, \ + functor::T_expr::T_numtype> > >( \ blitz::asExpr::getExpr(d1.unwrap())); \ } @@ -218,19 +218,19 @@ name(const blitz::ETBase& d1) \ template \ _bz_inline_et \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype> > > \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + applic::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype> > > \ name(const blitz::ETBase& d1, \ const blitz::ETBase& d2) \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype> > >( \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + applic::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype> > >( \ blitz::asExpr::getExpr(d1.unwrap()), \ blitz::asExpr::getExpr(d2.unwrap())); \ } @@ -240,20 +240,20 @@ name(const blitz::ETBase& d1, \ template \ _bz_inline_et \ blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr< \ + typename blitz::asExpr< \ blitz::TinyVector >::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ applic, \ - _bz_typename blitz::asExpr::T_expr::T_numtype> > \ + typename blitz::asExpr::T_expr::T_numtype> > \ name(const blitz::TinyVector d1, \ const blitz::ETBase& d2) \ { \ return blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr< \ + typename blitz::asExpr< \ blitz::TinyVector >::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ applic, \ - _bz_typename blitz::asExpr::T_expr::T_numtype> >( \ + typename blitz::asExpr::T_expr::T_numtype> >( \ blitz::asExpr< \ blitz::TinyVector >::getExpr(d1), \ blitz::asExpr::getExpr(d2.unwrap())); \ @@ -262,19 +262,19 @@ name(const blitz::TinyVector d1, \ template \ _bz_inline_et \ blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr< \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr< \ blitz::TinyVector >::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ + applic::T_expr::T_numtype, \ blitz::TinyVector > > \ name(const blitz::ETBase& d1, \ const blitz::TinyVector d2) \ { \ return blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr< \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr< \ blitz::TinyVector >::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ + applic::T_expr::T_numtype, \ blitz::TinyVector > >( \ blitz::asExpr::getExpr(d1.unwrap()), \ blitz::asExpr< \ @@ -287,15 +287,15 @@ template \ _bz_inline_et \ blitz::_bz_ArrayExprBinaryOp< \ blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - applic::T_expr::T_numtype> >\ + typename blitz::asExpr::T_expr, \ + applic::T_expr::T_numtype> >\ name(const sca d1, const blitz::ETBase& d2) \ { \ return blitz::_bz_ArrayExprBinaryOp< \ blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ applic::T_expr::T_numtype> >( \ + typename blitz::asExpr::T_expr::T_numtype> >( \ blitz::asExpr::getExpr(d1), \ blitz::asExpr::getExpr(d2.unwrap())); \ } \ @@ -303,16 +303,16 @@ name(const sca d1, const blitz::ETBase& d2) \ template \ _bz_inline_et \ blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ blitz::asExpr::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ + applic::T_expr::T_numtype, \ sca > > \ name(const blitz::ETBase& d1, const sca d2) \ { \ return blitz::_bz_ArrayExprBinaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ blitz::asExpr::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ + applic::T_expr::T_numtype, \ sca > >( \ blitz::asExpr::getExpr(d1.unwrap()), \ blitz::asExpr::getExpr(d2)); \ @@ -330,24 +330,24 @@ name(const blitz::ETBase& d1, const sca d2) \ template \ _bz_inline_et \ blitz::_bz_ArrayExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype> > > \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + applic::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype> > > \ name(const blitz::ETBase& d1, \ const blitz::ETBase& d2, \ const blitz::ETBase& d3) \ { \ return blitz::_bz_ArrayExpr< \ blitz::_bz_ArrayExprTernaryOp< \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - applic<_bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype> > >( \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + applic::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype> > >( \ blitz::asExpr::getExpr(d1.unwrap()), \ blitz::asExpr::getExpr(d2.unwrap()), \ blitz::asExpr::getExpr(d3.unwrap())); \ @@ -369,15 +369,15 @@ name(const blitz::ETBase& d1, \ < \ blitz::_bz_ArrayExprQuaternaryOp \ < \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ functor< \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype \ > > > \ \ name(const blitz::ETBase& d1, \ @@ -389,14 +389,14 @@ name(const blitz::ETBase& d1, \ < \ blitz::_bz_ArrayExprBinaryOp \ < \ - _bz_typename blitz::asExpr::T_expr, \ - _bz_typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ + typename blitz::asExpr::T_expr, \ functor \ < \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype, \ - _bz_typename blitz::asExpr::T_expr::T_numtype \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype, \ + typename blitz::asExpr::T_expr::T_numtype \ > > > \ ( \ blitz::asExpr::getExpr(d1.unwrap()), \ diff --git a/blitz/array/ops.cc b/blitz/array/ops.cc index f42ad4bb..f8d0f032 100644 --- a/blitz/array/ops.cc +++ b/blitz/array/ops.cc @@ -64,9 +64,9 @@ _bz_forceinline Array& Array::operator=(const ETBase& expr) { - _bz_evaluate(*this, _bz_typename asExpr::T_expr(expr.unwrap()), + _bz_evaluate(*this, typename asExpr::T_expr(expr.unwrap()), _bz_update::T_expr::T_result>()); + typename asExpr::T_expr::T_result>()); return *this; } @@ -79,7 +79,7 @@ Array::operator=(const Array& x) typedef typename asExpr >::T_expr T_expr; _bz_evaluate(*this, asExpr >::getExpr(x), _bz_update()); + typename T_expr::T_result>()); return *this; } @@ -90,8 +90,8 @@ Array::operator=(const Array& x) Array& \ Array::operator op(const ETBase& expr) \ { \ - _bz_evaluate(*this, _bz_typename asExpr::T_expr(expr.unwrap()), \ - name::T_expr::T_result>()); \ + _bz_evaluate(*this, typename asExpr::T_expr(expr.unwrap()), \ + name::T_expr::T_result>()); \ return *this; \ } \ template \ @@ -101,7 +101,7 @@ Array::operator=(const Array& x) { \ typedef typename asExpr >::T_expr T_expr; \ _bz_evaluate(*this, asExpr >::getExpr(x), \ - name()); \ + name()); \ return *this; \ } \ template \ @@ -111,7 +111,7 @@ Array::operator=(const Array& x) { \ typedef typename asExpr::T_expr T_expr; \ _bz_evaluate(*this, asExpr::getExpr(x), \ - name()); \ + name()); \ return *this; \ } @@ -316,7 +316,7 @@ template template inline Array& Array::operator=(BZ_ETPARM(_bz_ArrayExpr) expr) { - evaluate(expr, _bz_update()); + evaluate(expr, _bz_update()); return *this; } @@ -324,7 +324,7 @@ template template inline Array& Array::operator+=(BZ_ETPARM(_bz_ArrayExpr) expr) { - evaluate(expr, _bz_plus_update()); + evaluate(expr, _bz_plus_update()); return *this; } @@ -333,7 +333,7 @@ inline Array& Array::operator-=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_minus_update()); + typename T_expr::T_numtype>()); return *this; } @@ -342,7 +342,7 @@ inline Array& Array::operator*=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_multiply_update()); + typename T_expr::T_numtype>()); return *this; } @@ -351,7 +351,7 @@ inline Array& Array::operator/=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_divide_update()); + typename T_expr::T_numtype>()); return *this; } @@ -360,7 +360,7 @@ inline Array& Array::operator%=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_mod_update()); + typename T_expr::T_numtype>()); return *this; } @@ -369,7 +369,7 @@ inline Array& Array::operator^=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_xor_update()); + typename T_expr::T_numtype>()); return *this; } @@ -378,7 +378,7 @@ inline Array& Array::operator&=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_bitand_update()); + typename T_expr::T_numtype>()); return *this; } @@ -387,7 +387,7 @@ inline Array& Array::operator|=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_bitor_update()); + typename T_expr::T_numtype>()); return *this; } @@ -396,7 +396,7 @@ inline Array& Array::operator>>=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_shiftr_update()); + typename T_expr::T_numtype>()); return *this; } @@ -405,7 +405,7 @@ inline Array& Array::operator<<=(BZ_ETPARM(_bz_ArrayExpr) expr) { evaluate(expr, _bz_shiftl_update()); + typename T_expr::T_numtype>()); return *this; } diff --git a/blitz/array/reduce.cc b/blitz/array/reduce.cc index b81c752c..4afc0406 100644 --- a/blitz/array/reduce.cc +++ b/blitz/array/reduce.cc @@ -34,7 +34,7 @@ namespace blitz { template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_ArrayExprFullReduce(T_expr expr, T_reduction reduction) { #ifdef BZ_TAU_PROFILING @@ -86,7 +86,7 @@ struct _bz_IndexingVariant > { }; template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_reduceWithIndexTraversalGeneric(T_expr expr, T_reduction reduction) { // This is optimized assuming C-style arrays. @@ -134,7 +134,7 @@ _bz_reduceWithIndexTraversalGeneric(T_expr expr, T_reduction reduction) } template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_reduceWithIndexTraversal(T_expr expr, T_reduction reduction) { return _bz_reduceWithIndexTraversalGeneric(expr,reduction); @@ -143,7 +143,7 @@ _bz_reduceWithIndexTraversal(T_expr expr, T_reduction reduction) // This version is for reductions that require a vector of index positions. template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_reduceWithIndexVectorTraversal(T_expr expr, T_reduction reduction) { // We are doing minIndex/maxIndex, so initialize with lower bound diff --git a/blitz/array/reduce.h b/blitz/array/reduce.h index 3a687364..700e59c1 100644 --- a/blitz/array/reduce.h +++ b/blitz/array/reduce.h @@ -79,7 +79,7 @@ template class _bz_ArrayExprReduce { public: - typedef _bz_typename T_reduction::T_numtype T_numtype; + typedef typename T_reduction::T_numtype T_numtype; // select return type typedef typename unwrapET::T_unwrapped test; @@ -312,15 +312,15 @@ class _bz_ArrayExprReduce { #define BZ_DECL_ARRAY_PARTIAL_REDUCE(fn,reduction) \ template \ inline \ - _bz_ArrayExpr<_bz_ArrayExprReduce<_bz_typename blitz::asExpr::T_expr, \ + _bz_ArrayExpr<_bz_ArrayExprReduce::T_expr, \ N_index, \ - reduction<_bz_typename T_expr::T_numtype> > > \ + reduction > > \ fn(const blitz::ETBase& expr, \ const IndexPlaceholder&) \ { \ - return _bz_ArrayExprReduce<_bz_typename blitz::asExpr::T_expr, \ + return _bz_ArrayExprReduce::T_expr, \ N_index, \ - reduction<_bz_typename T_expr::T_numtype> > \ + reduction > \ (blitz::asExpr::getExpr(expr.unwrap())); \ } @@ -343,26 +343,26 @@ BZ_DECL_ARRAY_PARTIAL_REDUCE(last, ReduceLast) // Prototype of reduction functions template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_ArrayExprFullReduce(T_expr expr, T_reduction reduction); template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_reduceWithIndexTraversal(T_expr expr, T_reduction reduction); template -_bz_typename T_reduction::T_resulttype +typename T_reduction::T_resulttype _bz_reduceWithIndexVectorTraversal(T_expr expr, T_reduction reduction); #define BZ_DECL_ARRAY_FULL_REDUCE(fn,reduction) \ template \ _bz_inline_et \ -_bz_typename reduction<_bz_typename T_expr::T_numtype>::T_resulttype \ +typename reduction::T_resulttype \ fn(const blitz::ETBase& expr) \ { \ return _bz_ArrayExprFullReduce \ (blitz::asExpr::getExpr(expr.unwrap()), \ - reduction<_bz_typename T_expr::T_numtype>()); \ + reduction()); \ } \ BZ_DECL_ARRAY_FULL_REDUCE(sum, ReduceSum) @@ -383,13 +383,13 @@ BZ_DECL_ARRAY_FULL_REDUCE(last, ReduceLast) #define BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(fn,reduction) \ template \ _bz_inline_et \ - _bz_typename reduction<_bz_typename T_expr::T_numtype, \ + typename reduction::T_resulttype \ fn(const blitz::ETBase& expr) \ { \ return _bz_reduceWithIndexVectorTraversal \ (blitz::asExpr::getExpr(expr.unwrap()), \ - reduction<_bz_typename T_expr::T_numtype, T_expr::rank_>()); \ + reduction()); \ } BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(minIndex, ReduceMinIndexVector) diff --git a/blitz/array/slice.h b/blitz/array/slice.h index 2c0d602b..22ae39b1 100644 --- a/blitz/array/slice.h +++ b/blitz/array/slice.h @@ -37,6 +37,13 @@ #include + +#include +#include +#include +#include + + namespace blitz { // Forward declarations @@ -77,52 +84,30 @@ class ArraySectionInfo { static const int isPick = 0; }; -template + +// #define NUMBER_OF_PARAMS BOOST_PP_SUB(BLITZ_ARRAY_LARGEST_RANK,1) + +template class SliceInfo { public: - static const int - numValidTypes = ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType - + ArraySectionInfo::isValidType; +#define DEFAULT_print(z, n, data) + ArraySectionInfo::isValidType + + static const int + numValidTypes = ArraySectionInfo::isValidType BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~); + +#undef DEFAULT_print +#define DEFAULT_print(z, n, data) + ArraySectionInfo::rank static const int - rank = ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank - + ArraySectionInfo::rank; + rank = ArraySectionInfo::rank BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~); + +#undef DEFAULT_print +#define DEFAULT_print(z, n, data) + ArraySectionInfo::isPick static const int - isPick = ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick - + ArraySectionInfo::isPick; + isPick = ArraySectionInfo::isPick BOOST_PP_REPEAT_FROM_TO(1,BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~); + +#undef DEFAULT_print typedef Array T_array; typedef Array T_slice; diff --git a/blitz/array/slicing.cc b/blitz/array/slicing.cc index 0f6036f6..79ae63a1 100644 --- a/blitz/array/slicing.cc +++ b/blitz/array/slicing.cc @@ -30,6 +30,8 @@ #ifndef BZ_ARRAYSLICING_CC #define BZ_ARRAYSLICING_CC +#include + #ifndef BZ_ARRAY_H #error must be included via #endif @@ -214,12 +216,8 @@ void Array::constructSubarray( * combination of int and Range parameters. There's room for up * to 11 parameters, but any unused parameters have no effect. */ -template template -void Array::constructSlice(Array& array, - R0 r0, R1 r1, R2 r2, R3 r3, R4 r4, R5 r5, R6 r6, R7 r7, R8 r8, R9 r9, - R10 r10) +template template +void Array::constructSlice(Array& array, BOOST_PP_ENUM_BINARY_PARAMS(BLITZ_ARRAY_LARGEST_RANK,R,r)) { MemoryBlockReference::changeBlock(array); @@ -227,17 +225,11 @@ void Array::constructSlice(Array& array, TinyVector rankMap; - slice(setRank, r0, array, rankMap, 0); - slice(setRank, r1, array, rankMap, 1); - slice(setRank, r2, array, rankMap, 2); - slice(setRank, r3, array, rankMap, 3); - slice(setRank, r4, array, rankMap, 4); - slice(setRank, r5, array, rankMap, 5); - slice(setRank, r6, array, rankMap, 6); - slice(setRank, r7, array, rankMap, 7); - slice(setRank, r8, array, rankMap, 8); - slice(setRank, r9, array, rankMap, 9); - slice(setRank, r10, array, rankMap, 10); +#define DEFAULT_print(z, n, data) slice(setRank, r##n, array, rankMap, n); + + BOOST_PP_REPEAT(BLITZ_ARRAY_LARGEST_RANK,DEFAULT_print,~) + +#undef DEFAULT_print // Redo the ordering_ array to account for dimensions which // have been sliced away. diff --git a/blitz/array/stencil-et-macros.h b/blitz/array/stencil-et-macros.h index 77358d81..90fffb60 100644 --- a/blitz/array/stencil-et-macros.h +++ b/blitz/array/stencil-et-macros.h @@ -51,11 +51,11 @@ namespace blitz { this slightly less painful for the majority of the stencil classes. */ #define BZ_ET_STENCIL_REDIRECT(name) \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(const Array& d1) \ { return name(d1.wrap()); } \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(Array& d1) \ { return name(d1.wrap()); } @@ -69,13 +69,13 @@ namespace blitz { differently. */ #define BZ_ET_STENCIL(name,result, etresult, MINB, MAXB) \ - template \ + template \ class name ## _et : public _bz_StencilExpr \ { \ public: \ typedef _bz_StencilExpr T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* if P_numtype is an ET-type, we need to return an expr */ \ typedef typename selectET::Type,T_numtype> Type; \ }; \ \ - typedef name ## _et<_bz_typename P_expr::T_range_result, T_numtype> T_range_result; \ + typedef name ## _et T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -102,7 +102,7 @@ namespace blitz { _bz_StencilExpr(a) \ { } \ \ - name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ + name ## _et(typename T_expr::T_ctorArg1 a) : \ _bz_StencilExpr(a) \ { } \ \ @@ -226,14 +226,14 @@ namespace blitz { differently. */ #define BZ_ET_STENCIL2(name,result, etresult, MINB, MAXB) \ - template \ + template \ class name ## _et2 : public _bz_StencilExpr2 \ { \ public: \ typedef _bz_StencilExpr2 T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr1 T_expr1; \ - typedef _bz_typename T_base::T_expr2 T_expr2; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr1 T_expr1; \ + typedef typename T_base::T_expr2 T_expr2; \ \ /* if P_numtype is an ET-type, we need to return an expr */ \ typedef typename selectET::Type,typename T_expr2::template tvresult::Type,T_numtype> Type; \ }; \ \ - typedef name ## _et2<_bz_typename P_expr1::T_range_result, _bz_typename P_expr2::T_range_result, T_numtype> T_range_result; \ + typedef name ## _et2 T_range_result; \ \ using T_base::iter1_; \ using T_base::iter2_; \ @@ -261,14 +261,14 @@ namespace blitz { _bz_StencilExpr2(a, b) \ { } \ /* \ - name ## _et2(_bz_typename T_expr::T_ctorArg1 a) : \ + name ## _et2(typename T_expr::T_ctorArg1 a) : \ _bz_StencilExpr2(a) \ { } \ */ \ T_result operator*() const \ { return name ## _stencilop(iter1_, iter2_); } \ \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter1_.moveTo(i); iter2_.moveTo(i); return name ## _stencilop(iter1_, iter2_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -416,12 +416,12 @@ namespace blitz { #define BZ_ET_STENCILM(name,result_rank, MINB, MAXB) \ template \ - class name ## _et : public _bz_StencilExpr::T_element, result_rank, result_rank> > \ + class name ## _et : public _bz_StencilExpr::T_element, result_rank, result_rank> > \ { \ public: \ - typedef _bz_StencilExpr::T_element, result_rank, result_rank> > T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef _bz_StencilExpr::T_element, result_rank, result_rank> > T_base; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* there is no return type selection, as we are returning a \ TinyMatrix. This must be returned as a FastTMCopyIterator since the \ @@ -435,7 +435,7 @@ namespace blitz { typedef name ## _et::Type> Type; \ }; \ \ - typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ + typedef name ## _et T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -448,13 +448,13 @@ namespace blitz { _bz_StencilExpr(a) \ { } \ \ - name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ + name ## _et(typename T_expr::T_ctorArg1 a) : \ _bz_StencilExpr(a) \ { } \ \ T_result operator*() const \ { return name ## _stencilop(iter_); } \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter_.moveTo(i); return name ## _stencilop(iter_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -534,10 +534,10 @@ namespace blitz { }; \ /* create ET from application to expression */ \ template \ - inline _bz_ArrayExpr::T_expr::T_range_result> > \ + inline _bz_ArrayExpr::T_expr::T_range_result> > \ name(const blitz::ETBase& d1) \ { \ - return _bz_ArrayExpr::T_expr::T_range_result> > \ + return _bz_ArrayExpr::T_expr::T_range_result> > \ (blitz::asExpr::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ } \ BZ_ET_STENCIL_REDIRECT(name) \ @@ -553,8 +553,8 @@ namespace blitz { { \ public: \ typedef _bz_StencilExpr > T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* there is no return type selection, we assume P_numtype is scalar \ and that we are returning a TinyVector. This needs to be returned \ @@ -570,7 +570,7 @@ namespace blitz { typedef name ## _et::Type> Type; \ }; \ \ - typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ + typedef name ## _et T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -583,13 +583,13 @@ namespace blitz { _bz_StencilExpr(a) \ { } \ \ - name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ + name ## _et(typename T_expr::T_ctorArg1 a) : \ _bz_StencilExpr(a) \ { } \ \ T_result operator*() const \ { return name ## _stencilop(iter_); } \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter_.moveTo(i); return name ## _stencilop(iter_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -669,10 +669,10 @@ namespace blitz { }; \ /* create ET from application to expression */ \ template \ - inline _bz_ArrayExpr::T_expr::T_range_result> > \ + inline _bz_ArrayExpr::T_expr::T_range_result> > \ name(const blitz::ETBase& d1) \ { \ - return _bz_ArrayExpr::T_expr::T_range_result> > \ + return _bz_ArrayExpr::T_expr::T_range_result> > \ (blitz::asExpr::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ } \ BZ_ET_STENCIL_REDIRECT(name) @@ -684,13 +684,13 @@ namespace blitz { #define BZ_ET_STENCIL_SCA(name, MINB, MAXB) \ template \ - class name ## _et : public _bz_StencilExpr::T_element> \ + class name ## _et : public _bz_StencilExpr::T_element> \ { \ public: \ - typedef _bz_typename multicomponent_traits::T_element T_result; \ + typedef typename multicomponent_traits::T_element T_result; \ typedef _bz_StencilExpr T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* there is no selecting return type here. because we *know* it is \ scalar T_result, there's no question of whether we could be doing \ @@ -703,7 +703,7 @@ namespace blitz { typedef name ## _et::Type> Type; \ }; \ \ - typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ + typedef name ## _et T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -716,13 +716,13 @@ namespace blitz { _bz_StencilExpr(a) \ { } \ \ - name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ + name ## _et(typename T_expr::T_ctorArg1 a) : \ _bz_StencilExpr(a) \ { } \ \ T_result operator*() const \ { return name ## _stencilop(iter_); } \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter_.moveTo(i); return name ## _stencilop(iter_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -802,10 +802,10 @@ namespace blitz { }; \ /* create ET from application to expression */ \ template \ - inline _bz_ArrayExpr::T_expr::T_range_result> > \ + inline _bz_ArrayExpr::T_expr::T_range_result> > \ name(const blitz::ETBase& d1) \ { \ - return _bz_ArrayExpr::T_expr::T_range_result> > \ + return _bz_ArrayExpr::T_expr::T_range_result> > \ (blitz::asExpr::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ } \ BZ_ET_STENCIL_REDIRECT(name) @@ -822,12 +822,12 @@ namespace blitz { #define BZ_ET_STENCIL_DIFF(name, MINB, MAXB) \ template \ class name ## _et : \ - public _bz_StencilExpr \ + public _bz_StencilExpr \ { \ public: \ - typedef _bz_StencilExpr T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef _bz_StencilExpr T_base; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* select return type */ \ typedef typename unwrapET::T_unwrapped test; \ @@ -842,7 +842,7 @@ namespace blitz { typedef name ## _et::Type> Type; \ }; \ \ - typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ + typedef name ## _et T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -855,13 +855,13 @@ namespace blitz { _bz_StencilExpr(a), dim_(dim) \ { } \ \ - name ## _et(_bz_typename T_expr::T_ctorArg1 a, int dim) : \ + name ## _et(typename T_expr::T_ctorArg1 a, int dim) : \ _bz_StencilExpr(a), dim_(dim) \ { } \ \ T_result operator*() const \ { return name ## _stencilop(iter_, dim_); } \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter_.moveTo(i); return name ## _stencilop(iter_, dim_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -954,11 +954,11 @@ namespace blitz { } \ /* forward operations on arrays to main function */ \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(const Array& d1, int dim) \ { return name(d1.wrap(), dim); } \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(Array& d1, int dim) \ { return name(d1.wrap(), dim); } @@ -969,13 +969,13 @@ namespace blitz { #define BZ_ET_STENCIL_MULTIDIFF(name, MINB, MAXB) \ template \ - class name ## _et_multi : public _bz_StencilExpr::T_element> \ + class name ## _et_multi : public _bz_StencilExpr::T_element> \ { \ public: \ - typedef _bz_typename multicomponent_traits::T_element T_result; \ + typedef typename multicomponent_traits::T_element T_result; \ typedef _bz_StencilExpr T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* there is no selecting return type here. because we *know* it is \ T_result, there's no question of whether we could be doing \ @@ -988,7 +988,7 @@ namespace blitz { typedef name ## _et_multi::Type> Type; \ }; \ \ - typedef name ## _et_multi<_bz_typename P_expr::T_range_result> T_range_result; \ + typedef name ## _et_multi T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -1002,14 +1002,14 @@ namespace blitz { comp_(comp), dim_(dim) \ { } \ \ - name ## _et_multi(_bz_typename T_expr::T_ctorArg1 a, int comp, int dim) : \ + name ## _et_multi(typename T_expr::T_ctorArg1 a, int comp, int dim) : \ _bz_StencilExpr(a), \ comp_(comp), dim_(dim) \ { } \ \ T_result operator*() const \ { return name ## _stencilop(iter_, comp_, dim_); } \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter_.moveTo(i); return name ## _stencilop(iter_, comp_, dim_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -1103,12 +1103,12 @@ namespace blitz { } \ /* forward operations on arrays to main function */ \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(const Array& d1, int comp, int dim) \ { return name(d1.wrap(), comp, dim); } \ \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(Array& d1, int comp, int dim) \ { return name(d1.wrap(), comp, dim); } @@ -1121,12 +1121,12 @@ namespace blitz { #define BZ_ET_STENCIL_DIFF2(name, MINB1, MAXB1, MINB2, MAXB2) \ template \ - class name ## _et : public _bz_StencilExpr \ + class name ## _et : public _bz_StencilExpr \ { \ public: \ - typedef _bz_StencilExpr T_base; \ - typedef _bz_typename T_base::T_numtype T_numtype; \ - typedef _bz_typename T_base::T_expr T_expr; \ + typedef _bz_StencilExpr T_base; \ + typedef typename T_base::T_numtype T_numtype; \ + typedef typename T_base::T_expr T_expr; \ \ /* select return type */ \ typedef typename unwrapET::T_unwrapped test; \ @@ -1141,7 +1141,7 @@ namespace blitz { typedef name ## _et::Type> Type; \ }; \ \ - typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ + typedef name ## _et T_range_result; \ \ using T_base::iter_; \ using T_base::rank_; \ @@ -1156,7 +1156,7 @@ namespace blitz { dim1_(dim1), dim2_(dim2) \ { } \ \ - name ## _et(_bz_typename T_expr::T_ctorArg1 a, \ + name ## _et(typename T_expr::T_ctorArg1 a, \ int dim1, int dim2) : \ _bz_StencilExpr(a), \ dim1_(dim1), dim2_(dim2) \ @@ -1164,7 +1164,7 @@ namespace blitz { \ T_result operator*() const \ { return name ## _stencilop(iter_, dim1_, dim2_); } \ - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const \ + T_result operator()(typename _bz_IndexParameter >::type i) const \ { iter_.moveTo(i); return name ## _stencilop(iter_, dim1_, dim2_); } \ \ T_range_result operator()(const RectDomain& d) const \ @@ -1259,12 +1259,12 @@ template \ } \ /* forward operations on arrays to main function */ \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(const Array& d1, int dim1, int dim2) \ { return name(d1.wrap(), dim1, dim2); } \ \ template \ - inline _bz_ArrayExpr >::T_expr::T_range_result> > \ + inline _bz_ArrayExpr >::T_expr::T_range_result> > \ name(Array& d1, int dim1, int dim2) \ { return name(d1.wrap(), dim1, dim2); } diff --git a/blitz/array/stencil-et.h b/blitz/array/stencil-et.h index 43053be1..de6f9d2d 100644 --- a/blitz/array/stencil-et.h +++ b/blitz/array/stencil-et.h @@ -95,7 +95,7 @@ template struct _bz_IndexParameter { the general ETBase function would work. */ /** ET base class for applying a stencil to an expression. */ -template +template class _bz_StencilExpr { public: typedef P_expr T_expr; @@ -121,7 +121,7 @@ class _bz_StencilExpr { : iter_(a) { } - _bz_StencilExpr(_bz_typename T_expr::T_ctorArg1 a) + _bz_StencilExpr(typename T_expr::T_ctorArg1 a) : iter_(a) { } @@ -234,13 +234,13 @@ class _bz_StencilExpr { }; /** ET base class for applying a stencil to an expression. */ -template +template class _bz_StencilExpr2 { public: typedef P_expr1 T_expr1; typedef P_expr2 T_expr2; - typedef _bz_typename T_expr1::T_numtype T_numtype1; - typedef _bz_typename T_expr2::T_numtype T_numtype2; + typedef typename T_expr1::T_numtype T_numtype1; + typedef typename T_expr2::T_numtype T_numtype2; typedef P_result T_numtype; typedef T_expr1 T_ctorArg1; typedef T_expr2 T_ctorArg2; @@ -267,7 +267,7 @@ class _bz_StencilExpr2 { /* // what is this for? - _bz_StencilExpr2(_bz_typename T_expr1::T_ctorArg1 a) + _bz_StencilExpr2(typename T_expr1::T_ctorArg1 a) : iter_(a) { } */ diff --git a/blitz/array/stencilops.h b/blitz/array/stencilops.h index 9e4dd29f..ee2d6567 100644 --- a/blitz/array/stencilops.h +++ b/blitz/array/stencilops.h @@ -49,23 +49,23 @@ namespace blitz { operators are defined with _stencilop appended to their name. */ #define BZ_DECLARE_STENCIL_OPERATOR1(name,A) \ template \ - inline _bz_typename T::T_numtype name ## _stencilop(const T& A) \ + inline typename T::T_numtype name ## _stencilop(const T& A) \ { #define BZ_END_STENCIL_OPERATOR } #define BZ_DECLARE_STENCIL_OPERATOR2(name,A,B) \ template \ - inline BZ_PROMOTE(_bz_typename T1::T_numtype, \ - _bz_typename T2::T_numtype) name ## _stencilop(const T1& A, \ + inline BZ_PROMOTE(typename T1::T_numtype, \ + typename T2::T_numtype) name ## _stencilop(const T1& A, \ const T2& B) \ { #define BZ_DECLARE_STENCIL_OPERATOR3(name,A,B,C) \ template \ - inline BZ_PROMOTE(BZ_PROMOTE(_bz_typename T1::T_numtype, \ - _bz_typename T2::T_numtype), \ - _bz_typename T3::T_numtype) name ## _stencilop(const T1& A, \ + inline BZ_PROMOTE(BZ_PROMOTE(typename T1::T_numtype, \ + typename T2::T_numtype), \ + typename T3::T_numtype) name ## _stencilop(const T1& A, \ const T2& B, \ const T3& C) \ { @@ -123,11 +123,11 @@ BZ_END_STENCIL_OPERATOR #define BZ_DECLARE_DIFF(name) \ template \ - inline _bz_typename T::T_numtype name ## _stencilop(const T& A, int dim = firstDim) + inline typename T::T_numtype name ## _stencilop(const T& A, int dim = firstDim) #define BZ_DECLARE_MULTIDIFF(name) \ template \ - inline _bz_typename multicomponent_traits<_bz_typename \ + inline typename multicomponent_traits::T_element name ## _stencilop(const T& A, int comp, int dim) /**************************************************************************** @@ -575,60 +575,60 @@ BZ_DECLARE_MULTIDIFF(forward42n) { return forward42_stencilop(A,comp,dim); } ****************************************************************************/ template -inline TinyVector<_bz_typename T::T_numtype,2> grad2D_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector grad2D_stencilop(const T& A) { + return TinyVector( central12_stencilop(A,firstDim), central12_stencilop(A,secondDim)); } template -inline TinyVector<_bz_typename T::T_numtype,2> grad2D4_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector grad2D4_stencilop(const T& A) { + return TinyVector( central14_stencilop(A,firstDim), central14_stencilop(A,secondDim)); } template -inline TinyVector<_bz_typename T::T_numtype,3> grad3D_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector grad3D_stencilop(const T& A) { + return TinyVector( central12_stencilop(A,firstDim), central12_stencilop(A,secondDim), central12_stencilop(A,thirdDim)); } template -inline TinyVector<_bz_typename T::T_numtype,3> grad3D4_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector grad3D4_stencilop(const T& A) { + return TinyVector( central14_stencilop(A,firstDim), central14_stencilop(A,secondDim), central14_stencilop(A,thirdDim)); } template -inline TinyVector<_bz_typename T::T_numtype,2> grad2Dn_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector grad2Dn_stencilop(const T& A) { + return TinyVector( central12n_stencilop(A,firstDim), central12n_stencilop(A,secondDim)); } template -inline TinyVector<_bz_typename T::T_numtype,2> grad2D4n_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector grad2D4n_stencilop(const T& A) { + return TinyVector( central14n_stencilop(A,firstDim), central14n_stencilop(A,secondDim)); } template -inline TinyVector<_bz_typename T::T_numtype,3> grad3Dn_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector grad3Dn_stencilop(const T& A) { + return TinyVector( central12n_stencilop(A,firstDim), central12n_stencilop(A,secondDim), central12n_stencilop(A,thirdDim)); } template -inline TinyVector<_bz_typename T::T_numtype,3> grad3D4n_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector grad3D4n_stencilop(const T& A) { + return TinyVector( central14n_stencilop(A,firstDim), central14n_stencilop(A,secondDim), central14n_stencilop(A,thirdDim)); @@ -639,30 +639,30 @@ inline TinyVector<_bz_typename T::T_numtype,3> grad3D4n_stencilop(const T& A) { ****************************************************************************/ template -inline TinyVector<_bz_typename T::T_numtype,2> gradSqr2D_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector gradSqr2D_stencilop(const T& A) { + return TinyVector( central22_stencilop(A,firstDim), central22_stencilop(A,secondDim)); } template -inline TinyVector<_bz_typename T::T_numtype,2> gradSqr2D4_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector gradSqr2D4_stencilop(const T& A) { + return TinyVector( central24_stencilop(A,firstDim), central24_stencilop(A,secondDim)); } template -inline TinyVector<_bz_typename T::T_numtype,3> gradSqr3D_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector gradSqr3D_stencilop(const T& A) { + return TinyVector( central22_stencilop(A,firstDim), central22_stencilop(A,secondDim), central22_stencilop(A,thirdDim)); } template -inline TinyVector<_bz_typename T::T_numtype,3> gradSqr3D4_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector gradSqr3D4_stencilop(const T& A) { + return TinyVector( central24_stencilop(A,firstDim), central24_stencilop(A,secondDim), central24_stencilop(A,thirdDim)); @@ -673,25 +673,25 @@ inline TinyVector<_bz_typename T::T_numtype,3> gradSqr3D4_stencilop(const T& A) ****************************************************************************/ template -inline TinyVector<_bz_typename T::T_numtype,2> gradSqr2Dn_stencilop(const T& A) { +inline TinyVector gradSqr2Dn_stencilop(const T& A) { return gradSqr2D_stencilop(A); } template -inline TinyVector<_bz_typename T::T_numtype,2> gradSqr2D4n_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,2>( +inline TinyVector gradSqr2D4n_stencilop(const T& A) { + return TinyVector( central24_stencilop(A,firstDim) * recip_12, central24_stencilop(A,secondDim) * recip_12); } template -inline TinyVector<_bz_typename T::T_numtype,3> gradSqr3Dn_stencilop(const T& A) { +inline TinyVector gradSqr3Dn_stencilop(const T& A) { return gradSqr3D_stencilop(A);(A); } template -inline TinyVector<_bz_typename T::T_numtype,3> gradSqr3D4n_stencilop(const T& A) { - return TinyVector<_bz_typename T::T_numtype,3>( +inline TinyVector gradSqr3D4n_stencilop(const T& A) { + return TinyVector( central24_stencilop(A,firstDim) * recip_12, central24_stencilop(A,secondDim) * recip_12, central24_stencilop(A,thirdDim) * recip_12); @@ -702,14 +702,14 @@ inline TinyVector<_bz_typename T::T_numtype,3> gradSqr3D4n_stencilop(const T& A) ****************************************************************************/ template -inline TinyMatrix<_bz_typename multicomponent_traits<_bz_typename +inline TinyMatrix::T_element, 3, 3> Jacobian3D_stencilop(const T& A) { const int x=0, y=1, z=2; const int u=0, v=1, w=2; - TinyMatrix<_bz_typename multicomponent_traits<_bz_typename + TinyMatrix::T_element, 3, 3> grad; grad(u,x) = central12_stencilop(A,u,x); @@ -726,14 +726,14 @@ Jacobian3D_stencilop(const T& A) } template -inline TinyMatrix<_bz_typename multicomponent_traits<_bz_typename +inline TinyMatrix::T_element, 3, 3> Jacobian3Dn_stencilop(const T& A) { const int x=0, y=1, z=2; const int u=0, v=1, w=2; - TinyMatrix<_bz_typename multicomponent_traits<_bz_typename + TinyMatrix::T_element, 3, 3> grad; grad(u,x) = central12n_stencilop(A,u,x); @@ -750,14 +750,14 @@ Jacobian3Dn_stencilop(const T& A) } template -inline TinyMatrix<_bz_typename multicomponent_traits<_bz_typename +inline TinyMatrix::T_element, 3, 3> Jacobian3D4_stencilop(const T& A) { const int x=0, y=1, z=2; const int u=0, v=1, w=2; - TinyMatrix<_bz_typename multicomponent_traits<_bz_typename + TinyMatrix::T_element, 3, 3> grad; grad(u,x) = central14_stencilop(A,u,x); @@ -774,14 +774,14 @@ Jacobian3D4_stencilop(const T& A) } template -inline TinyMatrix<_bz_typename multicomponent_traits<_bz_typename +inline TinyMatrix::T_element, 3, 3> Jacobian3D4n_stencilop(const T& A) { const int x=0, y=1, z=2; const int u=0, v=1, w=2; - TinyMatrix<_bz_typename multicomponent_traits<_bz_typename + TinyMatrix::T_element, 3, 3> grad; grad(u,x) = central14n_stencilop(A,u,x); @@ -804,11 +804,11 @@ Jacobian3D4n_stencilop(const T& A) // O(h^2) curl, using central difference template -inline TinyVector<_bz_typename T::T_numtype,3> +inline TinyVector curl_stencilop(const T& vx, const T& vy, const T& vz) { const int x = firstDim, y = secondDim, z = thirdDim; - return TinyVector<_bz_typename T::T_numtype,3>( + return TinyVector( central12_stencilop(vz,y)-central12_stencilop(vy,z), central12_stencilop(vx,z)-central12_stencilop(vz,x), central12_stencilop(vy,x)-central12_stencilop(vx,y)); @@ -816,11 +816,11 @@ curl_stencilop(const T& vx, const T& vy, const T& vz) { // Normalized O(h^2) curl, using central difference template -inline TinyVector<_bz_typename T::T_numtype,3> +inline TinyVector curln_stencilop(const T& vx, const T& vy, const T& vz) { const int x = firstDim, y = secondDim, z = thirdDim; - return TinyVector<_bz_typename T::T_numtype,3>( + return TinyVector( (central12_stencilop(vz,y)-central12_stencilop(vy,z)) * recip_2, (central12_stencilop(vx,z)-central12_stencilop(vz,x)) * recip_2, (central12_stencilop(vy,x)-central12_stencilop(vx,y)) * recip_2); @@ -828,10 +828,10 @@ curln_stencilop(const T& vx, const T& vy, const T& vz) { // Multicomponent curl template -inline _bz_typename T::T_numtype curl3D_stencilop(const T& A) { +inline typename T::T_numtype curl3D_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; - return _bz_typename T::T_numtype( + return typename T::T_numtype( central12_stencilop(A,z,y)-central12_stencilop(A,y,z), central12_stencilop(A,x,z)-central12_stencilop(A,z,x), central12_stencilop(A,y,x)-central12_stencilop(A,x,y)); @@ -839,10 +839,10 @@ inline _bz_typename T::T_numtype curl3D_stencilop(const T& A) { // Normalized multicomponent curl template -inline _bz_typename T::T_numtype curl3Dn_stencilop(const T& A) { +inline typename T::T_numtype curl3Dn_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; - return _bz_typename T::T_numtype( + return typename T::T_numtype( (central12_stencilop(A,z,y)-central12_stencilop(A,y,z)) * recip_2, (central12_stencilop(A,x,z)-central12_stencilop(A,z,x)) * recip_2, (central12_stencilop(A,y,x)-central12_stencilop(A,x,y)) * recip_2); @@ -850,11 +850,11 @@ inline _bz_typename T::T_numtype curl3Dn_stencilop(const T& A) { // O(h^4) curl, using 4th order central difference template -inline TinyVector<_bz_typename T::T_numtype,3> +inline TinyVector curl4_stencilop(const T& vx, const T& vy, const T& vz) { const int x = firstDim, y = secondDim, z = thirdDim; - return TinyVector<_bz_typename T::T_numtype,3>( + return TinyVector( central14_stencilop(vz,y)-central14_stencilop(vy,z), central14_stencilop(vx,z)-central14_stencilop(vz,x), central14_stencilop(vy,x)-central14_stencilop(vx,y)); @@ -862,11 +862,11 @@ curl4_stencilop(const T& vx, const T& vy, const T& vz) { // Normalized O(h^4) curl, using 4th order central difference template -inline TinyVector<_bz_typename T::T_numtype,3> +inline TinyVector curl4n_stencilop(const T& vx, const T& vy, const T& vz) { const int x = firstDim, y = secondDim, z = thirdDim; - return TinyVector<_bz_typename T::T_numtype,3>( + return TinyVector( (central14_stencilop(vz,y)-central14_stencilop(vy,z)) * recip_2, (central14_stencilop(vx,z)-central14_stencilop(vz,x)) * recip_2, (central14_stencilop(vy,x)-central14_stencilop(vx,y)) * recip_2); @@ -874,11 +874,11 @@ curl4n_stencilop(const T& vx, const T& vy, const T& vz) { // O(h^4) curl, using 4th order central difference (multicomponent version) template -inline _bz_typename T::T_numtype +inline typename T::T_numtype curl3D4_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; - return _bz_typename T::T_numtype( + return typename T::T_numtype( central14_stencilop(A,z,y)-central14_stencilop(A,y,z), central14_stencilop(A,x,z)-central14_stencilop(A,z,x), central14_stencilop(A,y,x)-central14_stencilop(A,x,y)); @@ -886,11 +886,11 @@ curl3D4_stencilop(const T& A) { // O(h^4) curl, using 4th order central difference (normalized multicomponent) template -inline _bz_typename T::T_numtype +inline typename T::T_numtype curl3D4n_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; - return _bz_typename T::T_numtype( + return typename T::T_numtype( (central14_stencilop(A,z,y)-central14_stencilop(A,y,z)) * recip_2, (central14_stencilop(A,x,z)-central14_stencilop(A,z,x)) * recip_2, (central14_stencilop(A,y,x)-central14_stencilop(A,x,y)) * recip_2); @@ -901,7 +901,7 @@ curl3D4n_stencilop(const T& A) { // Two-dimensional curl template -inline _bz_typename T::T_numtype +inline typename T::T_numtype curl_stencilop(const T& vx, const T& vy) { const int x = firstDim, y = secondDim; @@ -909,7 +909,7 @@ curl_stencilop(const T& vx, const T& vy) { } template -inline _bz_typename T::T_numtype +inline typename T::T_numtype curln_stencilop(const T& vx, const T& vy) { const int x = firstDim, y = secondDim; @@ -918,13 +918,13 @@ curln_stencilop(const T& vx, const T& vy) { // Multicomponent curl template -inline _bz_typename T::T_numtype::T_numtype curl2D_stencilop(const T& A) { +inline typename T::T_numtype::T_numtype curl2D_stencilop(const T& A) { const int x = firstDim, y = secondDim; return central12_stencilop(A,y,x)-central12_stencilop(A,x,y); } template -inline _bz_typename T::T_numtype::T_numtype curl2Dn_stencilop(const T& A) { +inline typename T::T_numtype::T_numtype curl2Dn_stencilop(const T& A) { const int x = firstDim, y = secondDim; return (central12_stencilop(A,y,x)-central12_stencilop(A,x,y)) * recip_2; } @@ -933,7 +933,7 @@ inline _bz_typename T::T_numtype::T_numtype curl2Dn_stencilop(const T& A) { // 4th order versions template -inline _bz_typename T::T_numtype +inline typename T::T_numtype curl4_stencilop(const T& vx, const T& vy) { const int x = firstDim, y = secondDim; @@ -941,7 +941,7 @@ curl4_stencilop(const T& vx, const T& vy) { } template -inline _bz_typename T::T_numtype +inline typename T::T_numtype curl4n_stencilop(const T& vx, const T& vy) { const int x = firstDim, y = secondDim; @@ -950,13 +950,13 @@ curl4n_stencilop(const T& vx, const T& vy) { // Multicomponent curl template -inline _bz_typename T::T_numtype::T_numtype curl2D4_stencilop(const T& A) { +inline typename T::T_numtype::T_numtype curl2D4_stencilop(const T& A) { const int x = firstDim, y = secondDim; return central14_stencilop(A,y,x)-central14_stencilop(A,x,y); } template -inline _bz_typename T::T_numtype::T_numtype curl2D4n_stencilop(const T& A) { +inline typename T::T_numtype::T_numtype curl2D4n_stencilop(const T& A) { const int x = firstDim, y = secondDim; return (central14_stencilop(A,y,x)-central14_stencilop(A,x,y)) * recip_12; } @@ -1006,7 +1006,7 @@ BZ_END_STENCIL_OPERATOR // these return a scalar, which is T_numtype of T::T_result (which may be an ET) template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div2D_stencilop(const T& A) { const int x = firstDim, y = secondDim; @@ -1014,7 +1014,7 @@ div2D_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div2Dn_stencilop(const T& A) { const int x = firstDim, y = secondDim; @@ -1022,7 +1022,7 @@ div2Dn_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div2D4_stencilop(const T& A) { const int x = firstDim, y = secondDim; @@ -1030,7 +1030,7 @@ div2D4_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div2D4n_stencilop(const T& A) { const int x = firstDim, y = secondDim; @@ -1038,7 +1038,7 @@ div2D4n_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div3D_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; @@ -1046,7 +1046,7 @@ div3D_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div3Dn_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; @@ -1054,7 +1054,7 @@ div3Dn_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div3D4_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; @@ -1062,7 +1062,7 @@ div3D4_stencilop(const T& A) } template -inline _bz_typename T::T_result::T_numtype +inline typename T::T_result::T_numtype div3D4n_stencilop(const T& A) { const int x = firstDim, y = secondDim, z = thirdDim; @@ -1074,7 +1074,7 @@ div3D4n_stencilop(const T& A) ****************************************************************************/ template -inline _bz_typename T::T_numtype +inline typename T::T_numtype mixed22_stencilop(const T& A, int x, int y) { return A.shift(-1,x,-1,y) - A.shift(-1,x,1,y) @@ -1082,14 +1082,14 @@ mixed22_stencilop(const T& A, int x, int y) } template -inline _bz_typename T::T_numtype +inline typename T::T_numtype mixed22n_stencilop(const T& A, int x, int y) { return mixed22_stencilop(A,x,y) * recip_4; } template -inline _bz_typename T::T_numtype +inline typename T::T_numtype mixed24_stencilop(const T& A, int x, int y) { return 64.0 * (A.shift(-1,x,-1,y) - A.shift(-1,x,1,y) - @@ -1103,7 +1103,7 @@ mixed24_stencilop(const T& A, int x, int y) } template -inline _bz_typename T::T_numtype +inline typename T::T_numtype mixed24n_stencilop(const T& A, int x, int y) { return mixed24_stencilop(A,x,y) * recip_144; @@ -1123,7 +1123,7 @@ mixed24n_stencilop(const T& A, int x, int y) ****************************************************************************/ template -inline _bz_typename multicomponent_traits<_bz_typename +inline typename multicomponent_traits::T_element div3DVec4_stencilop(const T& A, const UniformCubicGeometry<3>& geom) { @@ -1134,18 +1134,18 @@ inline _bz_typename multicomponent_traits<_bz_typename } template -inline _bz_typename T::T_numtype Laplacian3D4_stencilop(const T& A, +inline typename T::T_numtype Laplacian3D4_stencilop(const T& A, const UniformCubicGeometry<3>& geom) { return Laplacian3D4n_stencilop(A) * geom.recipSpatialStepPow2(); } template -inline _bz_typename T::T_numtype Laplacian3DVec4_stencilop(const T& A, +inline typename T::T_numtype Laplacian3DVec4_stencilop(const T& A, const UniformCubicGeometry<3>& geom) { - typedef _bz_typename T::T_numtype vector3d; - typedef _bz_typename multicomponent_traits::T_element + typedef typename T::T_numtype vector3d; + typedef typename multicomponent_traits::T_element T_element; const int u = 0, v = 1, w = 2; const int x = 0, y = 1, z = 2; @@ -1166,14 +1166,14 @@ inline _bz_typename T::T_numtype Laplacian3DVec4_stencilop(const T& A, } template -inline TinyMatrix<_bz_typename multicomponent_traits<_bz_typename +inline TinyMatrix::T_element, 3, 3> grad3DVec4_stencilop(const T& A, const UniformCubicGeometry<3>& geom) { const int x=0, y=1, z=2; const int u=0, v=1, w=2; - TinyMatrix<_bz_typename multicomponent_traits<_bz_typename + TinyMatrix::T_element, 3, 3> grad; // This is a 9*4 = 36 point stencil @@ -1191,9 +1191,9 @@ grad3DVec4_stencilop(const T& A, const UniformCubicGeometry<3>& geom) } template -inline TinyVector<_bz_typename T::T_numtype,3> grad3D4_stencilop(const T& A, +inline TinyVector grad3D4_stencilop(const T& A, const UniformCubicGeometry<3>& geom) { - return TinyVector<_bz_typename T::T_numtype,3>( + return TinyVector( central14_stencilop(A,firstDim) * recip_12 * geom.recipSpatialStep(), central14_stencilop(A,secondDim) * recip_12 * geom.recipSpatialStep(), central14_stencilop(A,thirdDim) * recip_12 * geom.recipSpatialStep()); diff --git a/blitz/array/stencils.cc b/blitz/array/stencils.cc index e8fb767b..a30acd82 100644 --- a/blitz/array/stencils.cc +++ b/blitz/array/stencils.cc @@ -99,16 +99,16 @@ void calcStencilExtent(T_extent& At, const T_stencil& stencil, const T_array10&, const T_array11&) { // Interrogate the stencil to find out its extent - _bz_typename stencilExtent_traits::T_stencilExtent Bt; - _bz_typename stencilExtent_traits::T_stencilExtent Ct; - _bz_typename stencilExtent_traits::T_stencilExtent Dt; - _bz_typename stencilExtent_traits::T_stencilExtent Et; - _bz_typename stencilExtent_traits::T_stencilExtent Ft; - _bz_typename stencilExtent_traits::T_stencilExtent Gt; - _bz_typename stencilExtent_traits::T_stencilExtent Ht; - _bz_typename stencilExtent_traits::T_stencilExtent It; - _bz_typename stencilExtent_traits::T_stencilExtent Jt; - _bz_typename stencilExtent_traits::T_stencilExtent Kt; + typename stencilExtent_traits::T_stencilExtent Bt; + typename stencilExtent_traits::T_stencilExtent Ct; + typename stencilExtent_traits::T_stencilExtent Dt; + typename stencilExtent_traits::T_stencilExtent Et; + typename stencilExtent_traits::T_stencilExtent Ft; + typename stencilExtent_traits::T_stencilExtent Gt; + typename stencilExtent_traits::T_stencilExtent Ht; + typename stencilExtent_traits::T_stencilExtent It; + typename stencilExtent_traits::T_stencilExtent Jt; + typename stencilExtent_traits::T_stencilExtent Kt; stencil.apply(At, Bt, Ct, Dt, Et, Ft, Gt, Ht, It, Jt, Kt); At.combine(Bt); @@ -243,16 +243,16 @@ void applyStencil_imp(const T_stencil& stencil, Array& A, // Now do the actual loop FastArrayIterator Aiter(A); - _bz_typename T_array2::T_iterator Biter(B); - _bz_typename T_array3::T_iterator Citer(C); - _bz_typename T_array4::T_iterator Diter(D); - _bz_typename T_array5::T_iterator Eiter(E); - _bz_typename T_array6::T_iterator Fiter(F); - _bz_typename T_array7::T_iterator Giter(G); - _bz_typename T_array8::T_iterator Hiter(H); - _bz_typename T_array9::T_iterator Iiter(I); - _bz_typename T_array10::T_iterator Jiter(J); - _bz_typename T_array11::T_iterator Kiter(K); + typename T_array2::T_iterator Biter(B); + typename T_array3::T_iterator Citer(C); + typename T_array4::T_iterator Diter(D); + typename T_array5::T_iterator Eiter(E); + typename T_array6::T_iterator Fiter(F); + typename T_array7::T_iterator Giter(G); + typename T_array8::T_iterator Hiter(H); + typename T_array9::T_iterator Iiter(I); + typename T_array10::T_iterator Jiter(J); + typename T_array11::T_iterator Kiter(K); // Load the strides for the innermost loop Aiter.loadStride(2); @@ -346,16 +346,16 @@ void applyStencil_imp(const T_stencil& stencil, Array& A, // Now do the actual loop FastArrayIterator Aiter(A); - _bz_typename T_array2::T_iterator Biter(B); - _bz_typename T_array3::T_iterator Citer(C); - _bz_typename T_array4::T_iterator Diter(D); - _bz_typename T_array5::T_iterator Eiter(E); - _bz_typename T_array6::T_iterator Fiter(F); - _bz_typename T_array7::T_iterator Giter(G); - _bz_typename T_array8::T_iterator Hiter(H); - _bz_typename T_array9::T_iterator Iiter(I); - _bz_typename T_array10::T_iterator Jiter(J); - _bz_typename T_array11::T_iterator Kiter(K); + typename T_array2::T_iterator Biter(B); + typename T_array3::T_iterator Citer(C); + typename T_array4::T_iterator Diter(D); + typename T_array5::T_iterator Eiter(E); + typename T_array6::T_iterator Fiter(F); + typename T_array7::T_iterator Giter(G); + typename T_array8::T_iterator Hiter(H); + typename T_array9::T_iterator Iiter(I); + typename T_array10::T_iterator Jiter(J); + typename T_array11::T_iterator Kiter(K); // Load the strides for the innermost loop Aiter.loadStride(1); @@ -439,16 +439,16 @@ void applyStencil_imp(const T_stencil& stencil, Array& A, // Now do the actual loop FastArrayIterator Aiter(A); - _bz_typename T_array2::T_iterator Biter(B); - _bz_typename T_array3::T_iterator Citer(C); - _bz_typename T_array4::T_iterator Diter(D); - _bz_typename T_array5::T_iterator Eiter(E); - _bz_typename T_array6::T_iterator Fiter(F); - _bz_typename T_array7::T_iterator Giter(G); - _bz_typename T_array8::T_iterator Hiter(H); - _bz_typename T_array9::T_iterator Iiter(I); - _bz_typename T_array10::T_iterator Jiter(J); - _bz_typename T_array11::T_iterator Kiter(K); + typename T_array2::T_iterator Biter(B); + typename T_array3::T_iterator Citer(C); + typename T_array4::T_iterator Diter(D); + typename T_array5::T_iterator Eiter(E); + typename T_array6::T_iterator Fiter(F); + typename T_array7::T_iterator Giter(G); + typename T_array8::T_iterator Hiter(H); + typename T_array9::T_iterator Iiter(I); + typename T_array10::T_iterator Jiter(J); + typename T_array11::T_iterator Kiter(K); // Load the strides for the innermost loop Aiter.loadStride(0); diff --git a/blitz/array/stencils.h b/blitz/array/stencils.h index 30b37c07..fc6c6856 100644 --- a/blitz/array/stencils.h +++ b/blitz/array/stencils.h @@ -186,7 +186,7 @@ class dummy { template void operator=(T2) { } - _bz_typename multicomponent_traits::T_element operator[](int i) const + typename multicomponent_traits::T_element operator[](int i) const { return value_[i]; } void loadStride(int) { } @@ -254,10 +254,10 @@ class stencilExtent : public ETBase > { return dummy(1); } - dummy<_bz_typename multicomponent_traits::T_element> + dummy::T_element> operator[](int) const { - return dummy<_bz_typename multicomponent_traits::T_element> + return dummy::T_element> (1); } diff --git a/blitz/array/storage.h b/blitz/array/storage.h index bd200b51..514f0300 100644 --- a/blitz/array/storage.h +++ b/blitz/array/storage.h @@ -324,7 +324,7 @@ template class FortranArray : public GeneralArrayStorage { private: typedef GeneralArrayStorage T_base; - typedef _bz_typename T_base::noInitializeFlag noInitializeFlag; + typedef typename T_base::noInitializeFlag noInitializeFlag; using T_base::ordering_; using T_base::ascendingFlag_; using T_base::base_; @@ -400,7 +400,7 @@ template class ColumnMajorArray : public GeneralArrayStorage { private: typedef GeneralArrayStorage T_base; - typedef _bz_typename T_base::noInitializeFlag noInitializeFlag; + typedef typename T_base::noInitializeFlag noInitializeFlag; using T_base::ordering_; using T_base::ascendingFlag_; using T_base::base_; diff --git a/blitz/array/where.h b/blitz/array/where.h index e6bd398a..0fb8c79c 100644 --- a/blitz/array/where.h +++ b/blitz/array/where.h @@ -49,15 +49,15 @@ class _bz_ArrayWhere { typedef P_expr1 T_expr1; typedef P_expr2 T_expr2; typedef P_expr3 T_expr3; - typedef _bz_typename T_expr2::T_numtype T_numtype2; - typedef _bz_typename T_expr3::T_numtype T_numtype3; + typedef typename T_expr2::T_numtype T_numtype2; + typedef typename T_expr3::T_numtype T_numtype3; typedef BZ_PROMOTE(T_numtype2, T_numtype3) T_numtype; typedef T_expr1 T_ctorArg1; typedef T_expr2 T_ctorArg2; typedef T_expr3 T_ctorArg3; - typedef _bz_ArrayWhere<_bz_typename P_expr1::T_range_result, - _bz_typename P_expr2::T_range_result, - _bz_typename P_expr3::T_range_result> T_range_result; + typedef _bz_ArrayWhere T_range_result; // select return type typedef typename unwrapET::T_unwrapped T_unwrapped1; @@ -423,13 +423,13 @@ class _bz_ArrayWhere { template inline -_bz_ArrayExpr<_bz_ArrayWhere<_bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr, _bz_typename asExpr::T_expr> > +_bz_ArrayExpr<_bz_ArrayWhere::T_expr, + typename asExpr::T_expr, typename asExpr::T_expr> > where(const T1& a, const T2& b, const T3& c) { - return _bz_ArrayExpr<_bz_ArrayWhere<_bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr> >(a,b,c); + return _bz_ArrayExpr<_bz_ArrayWhere::T_expr, + typename asExpr::T_expr, + typename asExpr::T_expr> >(a,b,c); } } diff --git a/blitz/array/zip.h b/blitz/array/zip.h index 520bbff8..18c929c2 100644 --- a/blitz/array/zip.h +++ b/blitz/array/zip.h @@ -59,19 +59,19 @@ struct Zip2 { template inline _bz_ArrayExpr<_bz_ArrayExprBinaryOp< - _bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr, + typename asExpr::T_expr, + typename asExpr::T_expr, Zip2::T_expr::T_numtype, - _bz_typename asExpr::T_expr::T_numtype> > > + typename asExpr::T_expr::T_numtype, + typename asExpr::T_expr::T_numtype> > > zip(const T1& a, const T2& b, T_component) { return _bz_ArrayExpr<_bz_ArrayExprBinaryOp< - _bz_typename asExpr::T_expr, - _bz_typename asExpr::T_expr, + typename asExpr::T_expr, + typename asExpr::T_expr, Zip2::T_expr::T_numtype, - _bz_typename asExpr::T_expr::T_numtype> > >(a,b); + typename asExpr::T_expr::T_numtype, + typename asExpr::T_expr::T_numtype> > >(a,b); } } diff --git a/blitz/blitz.h b/blitz/blitz.h index a364c9d3..66e5f37c 100644 --- a/blitz/blitz.h +++ b/blitz/blitz.h @@ -49,11 +49,15 @@ #endif #endif -#include +#include #include // Compiler-specific directives #include // Performance tuning #include // Profiling +#ifndef BLITZ_ARRAY_LARGEST_RANK +#define BLITZ_ARRAY_LARGEST_RANK 11 +#endif // BLITZ_ARRAY_LARGEST_RANK + #ifdef BZ_HAVE_STL #include #endif diff --git a/blitz/bzconfig.h b/blitz/bzconfig.h deleted file mode 100644 index 17d30b84..00000000 --- a/blitz/bzconfig.h +++ /dev/null @@ -1,106 +0,0 @@ -// -*- C++ -*- -/*************************************************************************** - * blitz/bzconfig.h Select compiler-specific config file - * - * $Id$ - * - * Copyright (C) 1997-2011 Todd Veldhuizen - * - * This file is a part of Blitz. - * - * Blitz is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Blitz is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Blitz. If not, see . - * - * Suggestions: blitz-devel@lists.sourceforge.net - * Bugs: blitz-support@lists.sourceforge.net - * - * For more information, please see the Blitz++ Home Page: - * https://sourceforge.net/projects/blitz/ - * - ***************************************************************************/ - -/* Select the compiler-specific config.h header file */ -#ifndef BZCONFIG_H -#define BZCONFIG_H - -#if defined(__APPLE__) -#if defined(__clang__) -/* clang compiler */ -#include -#elif defined(__GNUC__) -/* GNU gcc compiler for newer Mac OS X Darwin */ -#include -#else -/* IBM xlc compiler for Darwin */ -#include -#endif - -#elif defined(__INTEL_COMPILER) -/* Intel icc compiler */ -#include - -#elif defined(_MSC_VER) -/* Microsoft VS.NET compiler */ -#include - -#elif defined(__xlC__) -/* IBM xlC compiler */ -#include - -#elif defined(__DECCXX) -/* Compaq cxx compiler */ -#include - -#elif defined(__HP_aCC) -/* HP aCC compiler */ -#include - -#elif defined(_SGI_COMPILER_VERSION) -/* SGI CC compiler */ -#include - -#elif defined(__SUNPRO_CC) -/* SunPRO CC compiler */ -#include - -#elif defined(__PATHCC__) -/* Pathscale pathCC compiler */ -#include - -#elif defined(__clang__) -/* clang compiler */ -#include - -#elif defined(__GNUC__) -/* GNU gcc compiler */ -#include - -#elif defined(__PGI) -/* PGI pgCC compiler */ -#include - -#elif defined(__KCC) -/* KAI KCC compiler */ -#include - -#elif defined(__FUJITSU) -/* Fujitsu FCC compiler */ -#include - -/* Add other compilers here */ - -#else -#error Unknown compiler -#endif - -#endif /* BZCONFIG_H */ diff --git a/blitz/compiler.h b/blitz/compiler.h index 44efbf7f..a365e78c 100644 --- a/blitz/compiler.h +++ b/blitz/compiler.h @@ -31,35 +31,12 @@ #ifndef BZ_COMPILER_H #define BZ_COMPILER_H -// The file is used to select a compiler-specific -// config.h file that is generated automatically by configure. - -#include +#include /* * Define some kludges. */ -#ifndef BZ_HAVE_TEMPLATES - #error In : A working template implementation is required by Blitz++ (you may need to rerun the compiler/bzconfig script) -#endif - -#ifndef BZ_HAVE_MEMBER_TEMPLATES - #error In : Your compiler does not support member templates. (you may need to rerun the compiler/bzconfig script) -#endif - -#ifndef BZ_HAVE_FULL_SPECIALIZATION_SYNTAX - #error In : Your compiler does not support template<> full specialization syntax. You may need to rerun the compiler/bzconfig script. -#endif - -#ifndef BZ_HAVE_PARTIAL_ORDERING - #error In : Your compiler does not support partial ordering (you may need to rerun the compiler/bzconfig script) -#endif - -#ifndef BZ_HAVE_PARTIAL_SPECIALIZATION - #error In : Your compiler does not support partial specialization (you may need to rerun the compiler/bzconfig script) -#endif - #ifdef BZ_HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE #define BZ_USE_NUMTRAIT #endif @@ -70,20 +47,6 @@ #define BZ_TEMPLATE_DEFAULT(X) #endif -#ifndef BZ_HAVE_EXPLICIT - #define explicit -#endif - -#ifdef BZ_HAVE_TYPENAME - #define _bz_typename typename -#else - #define _bz_typename -#endif - -#ifndef BZ_HAVE_MUTABLE - #define mutable -#endif - #ifdef BZ_DISABLE_RESTRICT #undef BZ_HAVE_NCEG_RESTRICT #endif @@ -96,12 +59,6 @@ #endif #endif -#if !defined(BZ_HAVE_BOOL) && !defined(BZ_NO_BOOL_KLUDGE) - #define bool int - #define true 1 - #define false 0 -#endif - #ifdef BZ_HAVE_ENUM_COMPUTATIONS_WITH_CAST #define BZ_ENUM_CAST(X) (int)X #elif defined(BZ_HAVE_ENUM_COMPUTATIONS) diff --git a/blitz/config.cmake.h.in b/blitz/config.h.in similarity index 64% rename from blitz/config.cmake.h.in rename to blitz/config.h.in index 3f1a255f..54dd45e7 100644 --- a/blitz/config.cmake.h.in +++ b/blitz/config.h.in @@ -4,14 +4,16 @@ // This file (if named config.h) was generated automatically when running cmake // from the file config.cmake.h.in. +extern const char blitz_GIT_SHA1[]; + // Macro for declaring aligned variables. -#cmakedefine BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE +/* #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE */ #ifdef BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE #define BZ_ALIGN_VARIABLE(vartype,varname,alignment) __declspec(align(alignment)) vartype varname; #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE #endif -#cmakedefine BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE +/* #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE */ #ifdef BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE #define BZ_ALIGN_VARIABLE(vartype,varname,alignment) vartype __attribute__ ((aligned (alignment))) varname; #undef BZ_HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE @@ -22,259 +24,259 @@ #endif // Enable dimensions with > 2^31 elements (NOT IMPLEMENTED). -#cmakedefine BZ_FULLY64BIT +/* #undef BZ_FULLY64BIT */ // define if bool is a built-in type -#cmakedefine BZ_HAVE_BOOL +#define BZ_HAVE_BOOL // define if the Boost library is available #cmakedefine BZ_HAVE_BOOST // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_BOOST_MPI +/* #undef BZ_HAVE_BOOST_MPI */ // define if the Boost::Serialization library is available #cmakedefine BZ_HAVE_BOOST_SERIALIZATION // define if the compiler has header -#cmakedefine BZ_HAVE_CLIMITS +#define BZ_HAVE_CLIMITS // define if the compiler has complex -#cmakedefine BZ_HAVE_COMPLEX +#define BZ_HAVE_COMPLEX // define if the compiler has standard complex functions -#cmakedefine BZ_HAVE_COMPLEX_FCNS +#define BZ_HAVE_COMPLEX_FCNS // define if the compiler has complex math functions -#cmakedefine BZ_HAVE_COMPLEX_MATH1 +#define BZ_HAVE_COMPLEX_MATH1 // define if the compiler has more complex math functions -#cmakedefine BZ_HAVE_COMPLEX_MATH2 +/* #undef BZ_HAVE_COMPLEX_MATH2 */ // define if complex math functions are in namespace std -#cmakedefine BZ_HAVE_COMPLEX_MATH_IN_NAMESPACE_STD +#define BZ_HAVE_COMPLEX_MATH_IN_NAMESPACE_STD // define if the compiler supports const_cast<> -#cmakedefine BZ_HAVE_CONST_CAST +#define BZ_HAVE_CONST_CAST // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_CSTRING +#define BZ_HAVE_CSTRING // define if the compiler supports default template parameters -#cmakedefine BZ_HAVE_DEFAULT_TEMPLATE_PARAMETERS +#define BZ_HAVE_DEFAULT_TEMPLATE_PARAMETERS // Obsolete ? // Define to 1 if you have the header file. -//#cmakedefine BZ_HAVE_DLFCN_H +/* #undef BZ_HAVE_DLFCN_H */ // define if the compiler supports dynamic_cast<> -#cmakedefine BZ_HAVE_DYNAMIC_CAST +#define BZ_HAVE_DYNAMIC_CAST // define if the compiler handle computations inside an enum -#cmakedefine BZ_HAVE_ENUM_COMPUTATIONS +#define BZ_HAVE_ENUM_COMPUTATIONS // define if the compiler handles (int) casts in enum computations -#cmakedefine BZ_HAVE_ENUM_COMPUTATIONS_WITH_CAST +#define BZ_HAVE_ENUM_COMPUTATIONS_WITH_CAST // define if the compiler supports exceptions -#cmakedefine BZ_HAVE_EXCEPTIONS +#define BZ_HAVE_EXCEPTIONS // define if the compiler supports the explicit keyword -#cmakedefine BZ_HAVE_EXPLICIT +#define BZ_HAVE_EXPLICIT // define if the compiler supports explicit template function qualification -#cmakedefine BZ_HAVE_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION +#define BZ_HAVE_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION // define if the compiler recognizes the full specialization syntax -#cmakedefine BZ_HAVE_FULL_SPECIALIZATION_SYNTAX +#define BZ_HAVE_FULL_SPECIALIZATION_SYNTAX // define if the compiler supports function templates with non-type parameters -#cmakedefine BZ_HAVE_FUNCTION_NONTYPE_PARAMETERS +#define BZ_HAVE_FUNCTION_NONTYPE_PARAMETERS // define if the compiler supports IEEE math library -#cmakedefine BZ_HAVE_IEEE_MATH +#define BZ_HAVE_IEEE_MATH // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_INTTYPES_H +#define BZ_HAVE_INTTYPES_H // Obsolete ? // Define to 1 if you have the `m' library (-lm). -#cmakedefine BZ_HAVE_LIBM +#define BZ_HAVE_LIBM // Define to 1 if you have the `papi' library (-lpapi). -#cmakedefine BZ_HAVE_LIBPAPI +/* #undef BZ_HAVE_LIBPAPI */ // define if the compiler supports member constants -#cmakedefine BZ_HAVE_MEMBER_CONSTANTS +#define BZ_HAVE_MEMBER_CONSTANTS // define if the compiler supports member templates -#cmakedefine BZ_HAVE_MEMBER_TEMPLATES +#define BZ_HAVE_MEMBER_TEMPLATES // define if the compiler supports member templates outside the class // declaration -#cmakedefine BZ_HAVE_MEMBER_TEMPLATES_OUTSIDE_CLASS +#define BZ_HAVE_MEMBER_TEMPLATES_OUTSIDE_CLASS // define if the compiler supports the mutable keyword -#cmakedefine BZ_HAVE_MUTABLE +#define BZ_HAVE_MUTABLE // define if the compiler supports the Numerical C Extensions Group restrict // keyword -#cmakedefine BZ_HAVE_NCEG_RESTRICT +/* #undef BZ_HAVE_NCEG_RESTRICT */ // define if the compiler supports the __restrict__ keyword -#cmakedefine BZ_HAVE_NCEG_RESTRICT_EGCS +#define BZ_HAVE_NCEG_RESTRICT_EGCS // define if the compiler has numeric_limits -#cmakedefine BZ_HAVE_NUMERIC_LIMITS +#define BZ_HAVE_NUMERIC_LIMITS // define if the compiler accepts the old for scoping rules -#cmakedefine BZ_HAVE_OLD_FOR_SCOPING +/* #undef BZ_HAVE_OLD_FOR_SCOPING */ // define if the compiler supports partial ordering -#cmakedefine BZ_HAVE_PARTIAL_ORDERING +#define BZ_HAVE_PARTIAL_ORDERING // define if the compiler supports partial specialization -#cmakedefine BZ_HAVE_PARTIAL_SPECIALIZATION +#define BZ_HAVE_PARTIAL_SPECIALIZATION // define if the compiler supports reinterpret_cast<> -#cmakedefine BZ_HAVE_REINTERPRET_CAST +#define BZ_HAVE_REINTERPRET_CAST // define if the compiler supports Run-Time Type Identification -#cmakedefine BZ_HAVE_RTTI +#define BZ_HAVE_RTTI // define if the compiler has getrusage() function -#cmakedefine BZ_HAVE_RUSAGE +#define BZ_HAVE_RUSAGE // define if the compiler supports static_cast<> -#cmakedefine BZ_HAVE_STATIC_CAST +#define BZ_HAVE_STATIC_CAST // define if the compiler supports ISO C++ standard library -#cmakedefine BZ_HAVE_STD +#define BZ_HAVE_STD // Obsolete ? // Define to 1 if you have the header file. -//#cmakedefine BZ_HAVE_STDINT_H +/* #undef BZ_HAVE_STDINT_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_STDLIB_H +/* #undef BZ_HAVE_STDLIB_H */ // define if the compiler supports Standard Template Library -#cmakedefine BZ_HAVE_STL +#define BZ_HAVE_STL // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_STRINGS_H +/* #undef BZ_HAVE_STRINGS_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_STRING_H +/* #undef BZ_HAVE_STRING_H */ // define if the compiler supports System V math library -#cmakedefine BZ_HAVE_SYSTEM_V_MATH +/* #undef BZ_HAVE_SYSTEM_V_MATH */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_SYS_STAT_H +/* #undef BZ_HAVE_SYS_STAT_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_SYS_TYPES_H +/* #undef BZ_HAVE_SYS_TYPES_H */ // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_TBB_ATOMIC_H +/* #undef BZ_HAVE_TBB_ATOMIC_H */ // define if the compiler supports basic templates -#cmakedefine BZ_HAVE_TEMPLATES +#define BZ_HAVE_TEMPLATES // define if the compiler supports templates as template arguments -#cmakedefine BZ_HAVE_TEMPLATES_AS_TEMPLATE_ARGUMENTS +#define BZ_HAVE_TEMPLATES_AS_TEMPLATE_ARGUMENTS // define if the compiler supports use of the template keyword as a qualifier -#cmakedefine BZ_HAVE_TEMPLATE_KEYWORD_QUALIFIER +#define BZ_HAVE_TEMPLATE_KEYWORD_QUALIFIER // define if the compiler supports template-qualified base class specifiers -#cmakedefine BZ_HAVE_TEMPLATE_QUALIFIED_BASE_CLASS +#define BZ_HAVE_TEMPLATE_QUALIFIED_BASE_CLASS // define if the compiler supports template-qualified return types -#cmakedefine BZ_HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE +#define BZ_HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE // define if the compiler supports function matching with argument types which // are template scope-qualified -#cmakedefine BZ_HAVE_TEMPLATE_SCOPED_ARGUMENT_MATCHING +#define BZ_HAVE_TEMPLATE_SCOPED_ARGUMENT_MATCHING // define if the compiler recognizes typename -#cmakedefine BZ_HAVE_TYPENAME +#define BZ_HAVE_TYPENAME // define if the compiler supports the vector type promotion mechanism -#cmakedefine BZ_HAVE_TYPE_PROMOTION +#define BZ_HAVE_TYPE_PROMOTION // Define to 1 if you have the header file. -#cmakedefine BZ_HAVE_UNISTD_H +/* #undef BZ_HAVE_UNISTD_H */ // define if the compiler supports numeric traits promotions -#cmakedefine BZ_HAVE_USE_NUMTRAIT +#define BZ_HAVE_USE_NUMTRAIT // define if the compiler has valarray -#cmakedefine BZ_HAVE_VALARRAY +#define BZ_HAVE_VALARRAY // define if the compiler has isnan function in namespace std -#cmakedefine BZ_ISNAN_IN_NAMESPACE_STD +#define BZ_ISNAN_IN_NAMESPACE_STD // define if the compiler has C math abs(integer types) in namespace std -#cmakedefine BZ_MATH_ABSINT_IN_NAMESPACE_STD +#define BZ_MATH_ABSINT_IN_NAMESPACE_STD // define if the compiler has C math functions in namespace std -#cmakedefine BZ_MATH_FN_IN_NAMESPACE_STD +#define BZ_MATH_FN_IN_NAMESPACE_STD // Name of package -#cmakedefine BZ_PACKAGE @BZ_PACKAGE@ +/* #undef BZ_PACKAGE */ // Define to the address where bug reports for this package should be sent. -#cmakedefine BZ_PACKAGE_BUGREPORT @BZ_PACKAGE_BUGREPORT@ +/* #undef BZ_PACKAGE_BUGREPORT */ // Define to the full name of this package. -#cmakedefine BZ_PACKAGE_NAME @BZ_PACKAGE_NAME@ +/* #undef BZ_PACKAGE_NAME */ // Define to the full name and version of this package. -#cmakedefine BZ_PACKAGE_STRING "@BZ_PACKAGE_STRING@" +#define BZ_PACKAGE_STRING " 1.0" // Define to the one symbol short name of this package. -#cmakedefine BZ_PACKAGE_TARNAME @BZ_PACKAGE_TARNAME@ +/* #undef BZ_PACKAGE_TARNAME */ // Define to the home page for this package. -#cmakedefine BZ_PACKAGE_URL @BZ_PACKAGE_URL@ +/* #undef BZ_PACKAGE_URL */ // Define to the version of this package. -#cmakedefine BZ_PACKAGE_VERSION @BZ_PACKAGE_VERSION@ +/* #undef BZ_PACKAGE_VERSION */ // Pad array lengths to SIMD width. -#cmakedefine BZ_PAD_ARRAYS +/* #undef BZ_PAD_ARRAYS */ // Set SIMD instruction width in bytes. -#cmakedefine BZ_SIMD_WIDTH @BZ_SIMD_WIDTH@ +#define BZ_SIMD_WIDTH 1 // Define to 1 if you have the ANSI C header files. -#cmakedefine BZ_STDC_HEADERS +/* #undef BZ_STDC_HEADERS */ // Enable Blitz thread-safety features -#cmakedefine BZ_THREADSAFE +/* #undef BZ_THREADSAFE */ // Use TBB atomic types. -#cmakedefine BZ_THREADSAFE_USE_TBB +/* #undef BZ_THREADSAFE_USE_TBB */ // Specifies whether compiler alignment pragmas should be used. -#cmakedefine BZ_USE_ALIGNMENT_PRAGMAS +/* #undef BZ_USE_ALIGNMENT_PRAGMAS */ // Version number of package -#cmakedefine BZ_VERSION +/* #undef BZ_VERSION */ // CXX -#cmakedefine BZ__compiler_name +/* #undef BZ__compiler_name */ // CXXFLAGS -#cmakedefine BZ__compiler_options +/* #undef BZ__compiler_options */ // date -#cmakedefine BZ__config_date +/* #undef BZ__config_date */ // uname -a -#cmakedefine BZ__os_name +/* #undef BZ__os_name */ // target -#cmakedefine BZ__platform +/* #undef BZ__platform */ diff --git a/blitz/et-forward.h b/blitz/et-forward.h index 020a83c8..a1020390 100644 --- a/blitz/et-forward.h +++ b/blitz/et-forward.h @@ -43,7 +43,7 @@ template class _bz_ArrayExprReduce; -template class _bz_StencilExpr; +template class _bz_StencilExpr; template class IndexPlaceholder; diff --git a/blitz/generate/CMakeLists.txt b/blitz/generate/CMakeLists.txt index c8b37360..ded1accd 100644 --- a/blitz/generate/CMakeLists.txt +++ b/blitz/generate/CMakeLists.txt @@ -1,10 +1,47 @@ -add_executable(genarrbops genarrbops.cpp) -add_executable(genarruops genarruops.cpp) -add_executable(genmatbops genmatbops.cpp) -add_executable(genmatuops genmatuops.cpp) -add_executable(genvecbops genvecbops.cpp) -add_executable(genvecuops genvecuops.cpp) -add_executable(genvecwhere genvecwhere.cpp) -add_executable(genvecbfn genvecbfn.cpp) -add_executable(genmathfunc genmathfunc.cpp) -add_executable(genpromote genpromote.cpp) +foreach(X b u) + add_executable("genarr${X}ops" "genarr${X}ops.cpp") + add_custom_command(TARGET "genarr${X}ops" + POST_BUILD + COMMAND $ + BYPRODUCTS "../array/${X}ops.cc" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ + COMMENT "built genarr${X}ops, now generating ../array/${X}ops.cc") +endforeach() + +#add_executable(genarrbops genarrbops.cpp) + +#add_custom_command(TARGET genarrbops + #POST_BUILD + #COMMAND $ + #BYPRODUCTS ../array/bops.cc + #WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ + #COMMENT "built genarrbops, now generating ../array/bops.cc") + +#add_executable(genarruops genarruops.cpp) + +macro(GENERATE_GENERATED_HEADER prog header) + add_executable(${prog} "${prog}.cpp") + add_custom_command(TARGET ${prog} + POST_BUILD + COMMAND $ + BYPRODUCTS ../${header} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ + COMMENT "built ${prog}, now generating ${header}") +endmacro() + +GENERATE_GENERATED_HEADER(genmatbops matbops.h) +GENERATE_GENERATED_HEADER(genmatuops matuops.h) +GENERATE_GENERATED_HEADER(genvecbops vecbops.cc) +GENERATE_GENERATED_HEADER(genvecuops vecuops.cc) +GENERATE_GENERATED_HEADER(genvecwhere vecwhere.cc) +GENERATE_GENERATED_HEADER(genvecbfn vecbfn.cc) +GENERATE_GENERATED_HEADER(genpromote promote-old.h) + +find_package(Python 3.8 REQUIRED COMPONENTS Interpreter) + +#add_custom_target(genStencilClasses ALL + #DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/genstencils.py + #COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/genstencils.py stencil-classes.cc + #BYPRODUCTS stencil-classes.cc + #COMMENT "Generating stencil-classes.cc by genstencils.py") + diff --git a/blitz/generate/genstencils.py b/blitz/generate/genstencils.py index f2eba28a..f3199567 100644 --- a/blitz/generate/genstencils.py +++ b/blitz/generate/genstencils.py @@ -16,12 +16,12 @@ def BZ_ET_STENCIL_REDIRECT(name): /* Explicit operators for arrays for stencil name. */ template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et >::T_expr::T_range_result> > #name#(const Array& d1) { return #name#(d1.wrap()); } template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et >::T_expr::T_range_result> > #name#(Array& d1) { return #name#(d1.wrap()); } """ @@ -41,13 +41,13 @@ def BZ_ET_STENCIL(name, result, etresult, MINB, MAXB): differently. The stencil ET calls the stencil operator name_stencilop, defined in stencilops.h. **/ - template + template class #name#_et : public _bz_StencilExpr { public: typedef _bz_StencilExpr T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // if P_numtype is an ET-type, we need to return an expr typedef typename selectET T_numtype> Type; }; - typedef #name#_et<_bz_typename P_expr::T_range_result, T_numtype> T_range_result; + typedef #name#_et T_range_result; using T_base::iter_; using T_base::rank_; @@ -75,7 +75,7 @@ class #name#_et : public _bz_StencilExpr _bz_StencilExpr(a) { } - #name#_et(_bz_typename T_expr::T_ctorArg1 a) : + #name#_et(typename T_expr::T_ctorArg1 a) : _bz_StencilExpr(a) { } @@ -200,14 +200,14 @@ def BZ_ET_STENCIL2(name, result, etresult, MINB, MAXB): of the expression, set result="P_numtype" and etresult="typename T1::T_numtype". Sorry for that ugliness, but they define types differently. */ -template +template class #name#_et2 : public _bz_StencilExpr2 { public: typedef _bz_StencilExpr2 T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr1 T_expr1; - typedef _bz_typename T_base::T_expr2 T_expr2; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr1 T_expr1; + typedef typename T_base::T_expr2 T_expr2; // if P_numtype is an ET-type, we need to return an expr typedef typename selectET T_numtype> Type; }; - typedef #name#_et2<_bz_typename P_expr1::T_range_result, _bz_typename P_expr2::T_range_result, T_numtype> T_range_result; + typedef #name#_et2 T_range_result; using T_base::iter1_; using T_base::iter2_; @@ -241,7 +241,7 @@ class #name#_et2 : public _bz_StencilExpr2 T_result operator*() const { return #name#_stencilop(iter1_, iter2_); } - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const + T_result operator()(typename _bz_IndexParameter >::type i) const { iter1_.moveTo(i); iter2_.moveTo(i); return #name#_stencilop(iter1_, iter2_); } @@ -393,12 +393,12 @@ def BZ_ET_STENCILM(name, result_rank, MINB, MAXB): N_rank>. P_numtype can be a TinyVector or a scalar, I think. */ template -class #name#_et : public _bz_StencilExpr::T_element, result_rank, result_rank> > +class #name#_et : public _bz_StencilExpr::T_element, result_rank, result_rank> > { public: - typedef _bz_StencilExpr::T_element, result_rank, result_rank> > T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef _bz_StencilExpr::T_element, result_rank, result_rank> > T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // there is no return type selection, as we are returning a // TinyMatrix. This must be returned as a FastTMCopyIterator since the @@ -412,7 +412,7 @@ class #name#_et : public _bz_StencilExpr::Type> Type; }; - typedef #name#_et<_bz_typename P_expr::T_range_result> T_range_result; + typedef #name#_et T_range_result; using T_base::iter_; using T_base::rank_; @@ -425,14 +425,14 @@ class #name#_et : public _bz_StencilExpr(a) { } - #name#_et(_bz_typename T_expr::T_ctorArg1 a) : + #name#_et(typename T_expr::T_ctorArg1 a) : _bz_StencilExpr(a) { } T_result operator*() const { return #name#_stencilop(iter_); } - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const + T_result operator()(typename _bz_IndexParameter >::type i) const { iter_.moveTo(i); return #name#_stencilop(iter_); } T_range_result operator()(const RectDomain& d) const @@ -512,10 +512,10 @@ class SliceInfo { }; /* create ET from application to expression */ template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et::T_expr::T_range_result> > #name#(const blitz::ETBase& d1) { - return _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr::T_expr::T_range_result> > + return _bz_ArrayExpr<#name#_et::T_expr::T_range_result> > (blitz::asExpr::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); } """ @@ -535,8 +535,8 @@ class #name#_et : public _bz_StencilExpr > T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // there is no return type selection, we assume P_numtype is scalar // and that we are returning a TinyVector. This needs to be returned @@ -551,7 +551,7 @@ class #name#_et : public _bz_StencilExpr::Type> Type; }; - typedef #name#_et<_bz_typename P_expr::T_range_result> T_range_result; + typedef #name#_et T_range_result; using T_base::iter_; using T_base::rank_; @@ -564,14 +564,14 @@ class #name#_et : public _bz_StencilExpr(a) { } - #name#_et(_bz_typename T_expr::T_ctorArg1 a) : + #name#_et(typename T_expr::T_ctorArg1 a) : _bz_StencilExpr(a) { } T_result operator*() const { return #name#_stencilop(iter_); } - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const + T_result operator()(typename _bz_IndexParameter >::type i) const { iter_.moveTo(i); return #name#_stencilop(iter_); } T_range_result operator()(const RectDomain& d) const @@ -651,10 +651,10 @@ class SliceInfo { }; /* create ET from application to expression */ template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et::T_expr::T_range_result> > #name#(const blitz::ETBase& d1) { - return _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr::T_expr::T_range_result> > + return _bz_ArrayExpr<#name#_et::T_expr::T_range_result> > (blitz::asExpr::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); } """ @@ -671,13 +671,13 @@ def BZ_ET_STENCIL_SCA(name, MINB, MAXB): array. */ template -class #name#_et : public _bz_StencilExpr::T_element> +class #name#_et : public _bz_StencilExpr::T_element> { public: - typedef _bz_typename multicomponent_traits::T_element T_result; + typedef typename multicomponent_traits::T_element T_result; typedef _bz_StencilExpr T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // there is no selecting return type here. because we *know* it is // scalar T_result, there's no question of whether we could be doing @@ -690,7 +690,7 @@ class #name#_et : public _bz_StencilExpr::Type> Type; }; -typedef #name#_et<_bz_typename P_expr::T_range_result> T_range_result; +typedef #name#_et T_range_result; using T_base::iter_; using T_base::rank_; @@ -703,13 +703,13 @@ class #name#_et : public _bz_StencilExpr(a) { } - #name#_et(_bz_typename T_expr::T_ctorArg1 a) : + #name#_et(typename T_expr::T_ctorArg1 a) : _bz_StencilExpr(a) { } T_numtype operator*() const { return #name#_stencilop(iter_); } - T_numtype operator()(_bz_typename _bz_IndexParameter >::type i) const + T_numtype operator()(typename _bz_IndexParameter >::type i) const { iter_.moveTo(i); return #name#_stencilop(iter_); } T_range_result operator()(const RectDomain& d) const @@ -789,10 +789,10 @@ class SliceInfo { }; /* create ET from application to expression */ template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et::T_expr::T_range_result> > #name#(const blitz::ETBase& d1) { - return _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr::T_expr::T_range_result> > + return _bz_ArrayExpr<#name#_et::T_expr::T_range_result> > (blitz::asExpr::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); } """ @@ -813,12 +813,12 @@ def BZ_ET_STENCIL_DIFF(name, MINB, MAXB): dimension. */ template -class #name#_et : public _bz_StencilExpr +class #name#_et : public _bz_StencilExpr { public: - typedef _bz_StencilExpr T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef _bz_StencilExpr T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // select return type typedef typename unwrapET::T_unwrapped test; @@ -833,7 +833,7 @@ class #name#_et : public _bz_StencilExpr typename T_expr::template tvresult::Type> Type; }; -typedef #name#_et<_bz_typename P_expr::T_range_result> T_range_result; +typedef #name#_et T_range_result; using T_base::iter_; using T_base::rank_; @@ -846,13 +846,13 @@ class #name#_et : public _bz_StencilExpr _bz_StencilExpr(a), dim_(dim) { } - #name#_et(_bz_typename T_expr::T_ctorArg1 a, int dim) : + #name#_et(typename T_expr::T_ctorArg1 a, int dim) : _bz_StencilExpr(a), dim_(dim) { } T_result operator*() const { return #name#_stencilop(iter_, dim_); } - T_result operator()(_bz_typename _bz_IndexParameter >::type i) const + T_result operator()(typename _bz_IndexParameter >::type i) const { iter_.moveTo(i); return #name#_stencilop(iter_, dim_); } T_range_result operator()(const RectDomain& d) const @@ -945,11 +945,11 @@ class SliceInfo { } /* forward operations on arrays to main function */ template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et >::T_expr::T_range_result> > #name#(const Array& d1, int dim) { return #name#(d1.wrap(), dim); } template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et >::T_expr::T_range_result> > #name#(Array& d1, int dim) { return #name#(d1.wrap(), dim); } """ @@ -964,13 +964,13 @@ def BZ_ET_STENCIL_MULTIDIFF(name, MINB, MAXB): multicomponent array and returns an array. */ template -class #name#_et_multi : public _bz_StencilExpr::T_element> +class #name#_et_multi : public _bz_StencilExpr::T_element> { public: - typedef _bz_typename multicomponent_traits::T_element T_result; + typedef typename multicomponent_traits::T_element T_result; typedef _bz_StencilExpr T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // there is no selecting return type here. because we *know* it is // T_result, there's no question of whether we could be doing @@ -983,7 +983,7 @@ class #name#_et_multi : public _bz_StencilExpr::Type> Type; }; -typedef #name#_et_multi<_bz_typename P_expr::T_range_result> T_range_result; +typedef #name#_et_multi T_range_result; using T_base::iter_; using T_base::rank_; @@ -997,14 +997,14 @@ class #name#_et_multi : public _bz_StencilExpr(a), comp_(comp), dim_(dim) { } T_numtype operator*() const { return #name#_stencilop(iter_, comp_, dim_); } - T_numtype operator()(_bz_typename _bz_IndexParameter >::type i) const + T_numtype operator()(typename _bz_IndexParameter >::type i) const { iter_.moveTo(i); return #name#_stencilop(iter_, comp_, dim_); } T_range_result operator()(const RectDomain& d) const @@ -1098,12 +1098,12 @@ class SliceInfo { } /* forward operations on arrays to main function */ template -inline _bz_ArrayExpr<#name#_et_multi<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et_multi >::T_expr::T_range_result> > #name#(const Array& d1, int comp, int dim) { return #name#(d1.wrap(), comp, dim); } template -inline _bz_ArrayExpr<#name#_et_multi<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et_multi >::T_expr::T_range_result> > #name#(Array& d1, int comp, int dim) { return #name#(d1.wrap(), comp, dim); } """ @@ -1122,12 +1122,12 @@ def BZ_ET_STENCIL_DIFF2(name, MINB1, MAXB1, MINB2, MAXB2): are the dimensions to do the differences in). */ template -class #name#_et : public _bz_StencilExpr +class #name#_et : public _bz_StencilExpr { public: - typedef _bz_StencilExpr T_base; - typedef _bz_typename T_base::T_numtype T_numtype; - typedef _bz_typename T_base::T_expr T_expr; + typedef _bz_StencilExpr T_base; + typedef typename T_base::T_numtype T_numtype; + typedef typename T_base::T_expr T_expr; // select return type typedef typename unwrapET::T_unwrapped test; @@ -1142,7 +1142,7 @@ class #name#_et : public _bz_StencilExpr typename T_expr::template tvresult::Type> Type; }; -typedef #name#_et<_bz_typename P_expr::T_range_result> T_range_result; +typedef #name#_et T_range_result; using T_base::iter_; using T_base::rank_; @@ -1157,7 +1157,7 @@ class #name#_et : public _bz_StencilExpr dim1_(dim1), dim2_(dim2) { } - #name#_et(_bz_typename T_expr::T_ctorArg1 a, + #name#_et(typename T_expr::T_ctorArg1 a, int dim1, int dim2) : _bz_StencilExpr(a), dim1_(dim1), dim2_(dim2) @@ -1165,7 +1165,7 @@ class #name#_et : public _bz_StencilExpr T_numtype operator*() const { return #name#_stencilop(iter_, dim1_, dim2_); } - T_numtype operator()(_bz_typename _bz_IndexParameter >::type i) const + T_numtype operator()(typename _bz_IndexParameter >::type i) const { iter_.moveTo(i); return #name#_stencilop(iter_, dim1_, dim2_); } T_range_result operator()(const RectDomain& d) const @@ -1260,12 +1260,12 @@ class SliceInfo { } /* forward operations on arrays to main function */ template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et >::T_expr::T_range_result> > #name#(const Array& d1, int dim1, int dim2) { return #name#(d1.wrap(), dim1, dim2); } template -inline _bz_ArrayExpr<#name#_et<_bz_typename blitz::asExpr >::T_expr::T_range_result> > +inline _bz_ArrayExpr<#name#_et >::T_expr::T_range_result> > #name#(Array& d1, int dim1, int dim2) { return #name#(d1.wrap(), dim1, dim2); } """ diff --git a/blitz/globeval.cc b/blitz/globeval.cc index 799ac749..74ca37ec 100644 --- a/blitz/globeval.cc +++ b/blitz/globeval.cc @@ -1217,7 +1217,7 @@ _bz_evaluateWithTiled2DTraversal(T_dest& dest, T_expr expr, T_update) for (int i=bi; i < ni; ++i) { - _bz_typename T_expr::T_numtype tmp1, tmp2, tmp3; + typename T_expr::T_numtype tmp1, tmp2, tmp3; // Common subexpression elimination -- compilers // won't necessarily do this on their own. diff --git a/blitz/listinit.h b/blitz/listinit.h index be00cf97..4ae248e2 100644 --- a/blitz/listinit.h +++ b/blitz/listinit.h @@ -67,11 +67,11 @@ class ListInitializer { T_iterator iter_; }; -template +template class ListInitializationSwitch { public: - typedef _bz_typename T_array::T_numtype T_numtype; + typedef typename T_array::T_numtype T_numtype; ListInitializationSwitch(const ListInitializationSwitch& lis) : array_(lis.array_), value_(lis.value_), diff --git a/blitz/meta/CMakeLists.txt b/blitz/meta/CMakeLists.txt index a325f1bb..6103287b 100644 --- a/blitz/meta/CMakeLists.txt +++ b/blitz/meta/CMakeLists.txt @@ -1,3 +1,5 @@ -set(BLITZ_META_HEADERS dot.h matassign.h matmat.h matvec.h metaprog.h product.h sum.h vecassign.h) +add_library(Meta INTERFACE) +add_library(Blitz::Meta ALIAS Meta) -install(FILES ${BLITZ_META_HEADERS} DESTINATION include/blitz/meta) +#install(FILES dot.h matassign.h matmat.h matvec.h metaprog.h product.h sum.h vecassign.h + #DESTINATION include/blitz/meta) diff --git a/blitz/meta/dot.h b/blitz/meta/dot.h index f7ba9dac..ae4e3975 100644 --- a/blitz/meta/dot.h +++ b/blitz/meta/dot.h @@ -48,8 +48,8 @@ class _bz_meta_vectorDot { static const int loopFlag = (I < N-1) ? 1 : 0; template - static inline BZ_PROMOTE(_bz_typename T_expr1::T_numtype, - _bz_typename T_expr2::T_numtype) + static inline BZ_PROMOTE(typename T_expr1::T_numtype, + typename T_expr2::T_numtype) f(const T_expr1& a, const T_expr2& b) { return a[I] * b[I] @@ -57,8 +57,8 @@ class _bz_meta_vectorDot { } template - static inline BZ_PROMOTE(_bz_typename T_expr1::T_numtype, - _bz_typename T_expr2::T_numtype) + static inline BZ_PROMOTE(typename T_expr1::T_numtype, + typename T_expr2::T_numtype) f_value_ref(T_expr1 a, const T_expr2& b) { return a[I] * b[I] @@ -66,8 +66,8 @@ class _bz_meta_vectorDot { } template - static inline BZ_PROMOTE(_bz_typename T_expr1::T_numtype, - _bz_typename T_expr2::T_numtype) + static inline BZ_PROMOTE(typename T_expr1::T_numtype, + typename T_expr2::T_numtype) f_ref_value(const T_expr1& a, T_expr2 b) { return a[I] * b[I] @@ -75,7 +75,7 @@ class _bz_meta_vectorDot { } template - static inline BZ_PROMOTE(_bz_typename T_expr1::T_numtype, + static inline BZ_PROMOTE(typename T_expr1::T_numtype, P_numtype2) dotWithArgs(const T_expr1& a, P_numtype2 i1, P_numtype2 i2=0, P_numtype2 i3=0, P_numtype2 i4=0, P_numtype2 i5=0, P_numtype2 i6=0, diff --git a/blitz/meta/product.h b/blitz/meta/product.h index 51075375..81711f9b 100644 --- a/blitz/meta/product.h +++ b/blitz/meta/product.h @@ -48,7 +48,7 @@ class _bz_meta_vectorProduct { static const int loopFlag = (I < N-1) ? 1 : 0; template - static inline BZ_SUMTYPE(_bz_typename T_expr1::T_numtype) + static inline BZ_SUMTYPE(typename T_expr1::T_numtype) f(const T_expr1& a) { return a[I] * _bz_meta_vectorProduct - static inline BZ_SUMTYPE(_bz_typename T_expr1::T_numtype) + static inline BZ_SUMTYPE(typename T_expr1::T_numtype) f(const T_expr1& a) { return a[I] + diff --git a/blitz/numtrait.h b/blitz/numtrait.h index 233fa4d3..79c2d7d6 100644 --- a/blitz/numtrait.h +++ b/blitz/numtrait.h @@ -45,10 +45,10 @@ namespace blitz { #define BZ_SIGNEDTYPE(X) X #else -#define BZ_SUMTYPE(X) _bz_typename NumericTypeTraits::T_sumtype -#define BZ_DIFFTYPE(X) _bz_typename NumericTypeTraits::T_difftype -#define BZ_FLOATTYPE(X) _bz_typename NumericTypeTraits::T_floattype -#define BZ_SIGNEDTYPE(X) _bz_typename NumericTypeTraits::T_signedtype +#define BZ_SUMTYPE(X) typename NumericTypeTraits::T_sumtype +#define BZ_DIFFTYPE(X) typename NumericTypeTraits::T_difftype +#define BZ_FLOATTYPE(X) typename NumericTypeTraits::T_floattype +#define BZ_SIGNEDTYPE(X) typename NumericTypeTraits::T_signedtype template class NumericTypeTraits { diff --git a/blitz/promote.h b/blitz/promote.h index 2459234a..52b08f40 100644 --- a/blitz/promote.h +++ b/blitz/promote.h @@ -37,7 +37,7 @@ namespace blitz { #ifdef BZ_HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE - #define BZ_PROMOTE(A,B) _bz_typename blitz::promote_trait::T_promote + #define BZ_PROMOTE(A,B) typename blitz::promote_trait::T_promote #else #define BZ_PROMOTE(A,B) A #endif @@ -110,8 +110,8 @@ struct _bz_promote2 { template struct promote_trait { // Handle promotion of small integers to int/unsigned int - typedef _bz_typename autopromote_trait::T_numtype T1; - typedef _bz_typename autopromote_trait::T_numtype T2; + typedef typename autopromote_trait::T_numtype T1; + typedef typename autopromote_trait::T_numtype T2; // True if T1 is higher ranked static const bool @@ -157,7 +157,7 @@ struct promote_trait { // static const bool // promoteToT1 = knowBothRanks ? T1IsBetter : defaultPromotion; - typedef _bz_typename _bz_promote2::T_promote T_promote; + typedef typename _bz_promote2::T_promote T_promote; }; #else // !BZ_HAVE_PARTIAL_SPECIALIZATION diff --git a/blitz/tinymat2.cc b/blitz/tinymat2.cc index df7fafa3..2d321421 100644 --- a/blitz/tinymat2.cc +++ b/blitz/tinymat2.cc @@ -78,8 +78,8 @@ inline TinyMatrix& TinyMatrix::operator=(const ETBase& expr) { - _tm_evaluate(_bz_typename asExpr::T_expr(expr.unwrap()), - _bz_update::T_expr::T_result>()); + _tm_evaluate(typename asExpr::T_expr(expr.unwrap()), + _bz_update::T_expr::T_result>()); return *this; } @@ -89,8 +89,8 @@ TinyMatrix::operator=(const ETBase& expr) inline TinyMatrix& \ TinyMatrix::operator op(const T& expr) \ { \ - _tm_evaluate(_bz_typename asExpr::T_expr(expr), \ - name::T_expr::T_result>()); \ + _tm_evaluate(typename asExpr::T_expr(expr), \ + name::T_expr::T_result>()); \ return *this; \ } diff --git a/blitz/tinyvec.cc b/blitz/tinyvec.cc index 7902cfb7..ba98a6bc 100644 --- a/blitz/tinyvec.cc +++ b/blitz/tinyvec.cc @@ -77,7 +77,7 @@ inline void TinyVector::_bz_assign(P_expr expr, P_updater u template template inline TinyVector::TinyVector(_bz_VecExpr expr) { - _bz_assign(expr, _bz_update()); + _bz_assign(expr, _bz_update()); } /***************************************************************************** @@ -88,7 +88,7 @@ template template inline TinyVector& TinyVector::operator=(_bz_VecExpr expr) { - _bz_assign(expr, _bz_update()); + _bz_assign(expr, _bz_update()); return *this; } @@ -97,7 +97,7 @@ inline TinyVector& TinyVector::operator+=(_bz_VecExpr expr) { _bz_assign(expr, _bz_plus_update()); + typename P_expr::T_numtype>()); return *this; } @@ -106,7 +106,7 @@ inline TinyVector& TinyVector::operator-=(_bz_VecExpr expr) { _bz_assign(expr, _bz_minus_update()); + typename P_expr::T_numtype>()); return *this; } @@ -115,7 +115,7 @@ inline TinyVector& TinyVector::operator*=(_bz_VecExpr expr) { _bz_assign(expr, _bz_multiply_update()); + typename P_expr::T_numtype>()); return *this; } @@ -124,7 +124,7 @@ inline TinyVector& TinyVector::operator/=(_bz_VecExpr expr) { _bz_assign(expr, _bz_divide_update()); + typename P_expr::T_numtype>()); return *this; } @@ -133,7 +133,7 @@ inline TinyVector& TinyVector::operator%=(_bz_VecExpr expr) { _bz_assign(expr, _bz_mod_update()); + typename P_expr::T_numtype>()); return *this; } @@ -142,7 +142,7 @@ inline TinyVector& TinyVector::operator^=(_bz_VecExpr expr) { _bz_assign(expr, _bz_xor_update()); + typename P_expr::T_numtype>()); return *this; } @@ -151,7 +151,7 @@ inline TinyVector& TinyVector::operator&=(_bz_VecExpr expr) { _bz_assign(expr, _bz_bitand_update()); + typename P_expr::T_numtype>()); return *this; } @@ -160,7 +160,7 @@ inline TinyVector& TinyVector::operator|=(_bz_VecExpr expr) { _bz_assign(expr, _bz_bitor_update()); + typename P_expr::T_numtype>()); return *this; } @@ -169,7 +169,7 @@ inline TinyVector& TinyVector::operator<<=(_bz_VecExpr expr) { _bz_assign(expr, _bz_shiftl_update()); + typename P_expr::T_numtype>()); return *this; } @@ -178,7 +178,7 @@ inline TinyVector& TinyVector::operator>>=(_bz_VecExpr expr) { _bz_assign(expr, _bz_shiftr_update()); + typename P_expr::T_numtype>()); return *this; } @@ -299,7 +299,7 @@ TinyVector::operator>>=(const int x) template template inline TinyVector& TinyVector::operator=(const TinyVector& x) { - (*this) = _bz_VecExpr<_bz_typename + (*this) = _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -307,7 +307,7 @@ TinyVector::operator=(const TinyVector template inline TinyVector& TinyVector::operator+=(const TinyVector& x) { - (*this) += _bz_VecExpr<_bz_typename + (*this) += _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -315,7 +315,7 @@ TinyVector::operator+=(const TinyVector template inline TinyVector& TinyVector::operator-=(const TinyVector& x) { - (*this) -= _bz_VecExpr<_bz_typename + (*this) -= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -323,7 +323,7 @@ TinyVector::operator-=(const TinyVector template inline TinyVector& TinyVector::operator*=(const TinyVector& x) { - (*this) *= _bz_VecExpr<_bz_typename + (*this) *= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -331,7 +331,7 @@ TinyVector::operator*=(const TinyVector template inline TinyVector& TinyVector::operator/=(const TinyVector& x) { - (*this) /= _bz_VecExpr<_bz_typename + (*this) /= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -339,7 +339,7 @@ TinyVector::operator/=(const TinyVector template inline TinyVector& TinyVector::operator%=(const TinyVector& x) { - (*this) %= _bz_VecExpr<_bz_typename + (*this) %= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -347,7 +347,7 @@ TinyVector::operator%=(const TinyVector template inline TinyVector& TinyVector::operator^=(const TinyVector& x) { - (*this) ^= _bz_VecExpr<_bz_typename + (*this) ^= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -355,7 +355,7 @@ TinyVector::operator^=(const TinyVector template inline TinyVector& TinyVector::operator&=(const TinyVector& x) { - (*this) &= _bz_VecExpr<_bz_typename + (*this) &= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -363,7 +363,7 @@ TinyVector::operator&=(const TinyVector template inline TinyVector& TinyVector::operator|=(const TinyVector& x) { - (*this) |= _bz_VecExpr<_bz_typename + (*this) |= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -371,7 +371,7 @@ TinyVector::operator|=(const TinyVector template inline TinyVector& TinyVector::operator<<=(const TinyVector& x) { - (*this) <<= _bz_VecExpr<_bz_typename + (*this) <<= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } @@ -379,7 +379,7 @@ TinyVector::operator<<=(const TinyVector template inline TinyVector& TinyVector::operator>>=(const TinyVector& x) { - (*this) >>= _bz_VecExpr<_bz_typename + (*this) >>= _bz_VecExpr::T_constIterator>(x.beginFast()); return *this; } diff --git a/blitz/tinyvec2.cc b/blitz/tinyvec2.cc index 399db599..43277071 100644 --- a/blitz/tinyvec2.cc +++ b/blitz/tinyvec2.cc @@ -83,10 +83,10 @@ _bz_forceinline TinyVector& TinyVector::operator=(const ETBase& expr) { - _tv_evaluate(_bz_typename asExpr::T_expr(expr.unwrap()), + _tv_evaluate(typename asExpr::T_expr(expr.unwrap()), _bz_update< T_numtype, - _bz_typename asExpr::T_expr::T_result>()); + typename asExpr::T_expr::T_result>()); return *this; } @@ -97,9 +97,9 @@ TinyVector::operator=(const ETBase& expr) TinyVector& \ TinyVector::operator op(const T& expr) \ { \ - _tv_evaluate(_bz_typename asExpr::T_expr(expr), \ + _tv_evaluate(typename asExpr::T_expr(expr), \ name::T_expr::T_result>()); \ + typename asExpr::T_expr::T_result>()); \ return *this; \ } diff --git a/blitz/traversal.cc b/blitz/traversal.cc index 54df5d90..0b1bed4b 100644 --- a/blitz/traversal.cc +++ b/blitz/traversal.cc @@ -42,7 +42,7 @@ namespace blitz { static set > *_bz_intel_kludge; //template -//_bz_typename TraversalOrderCollection::T_set +//typename TraversalOrderCollection::T_set // TraversalOrderCollection::traversals_; template diff --git a/blitz/traversal.h b/blitz/traversal.h index 71182574..f797cd4f 100644 --- a/blitz/traversal.h +++ b/blitz/traversal.h @@ -113,9 +113,9 @@ template class TraversalOrderCollection { public: typedef TraversalOrder T_traversal; - typedef _bz_typename T_traversal::T_coord T_coord; + typedef typename T_traversal::T_coord T_coord; typedef set T_set; - typedef _bz_typename set::const_iterator T_iterator; + typedef typename set::const_iterator T_iterator; const T_traversal* find(const T_coord& size) { @@ -135,7 +135,7 @@ class TraversalOrderCollection { }; template -_bz_typename TraversalOrderCollection::T_set +typename TraversalOrderCollection::T_set TraversalOrderCollection::traversals_; /* diff --git a/cmake/BlitzConfigFileName.cmake b/cmake/BlitzConfigFileName.cmake deleted file mode 100644 index 85ad9dca..00000000 --- a/cmake/BlitzConfigFileName.cmake +++ /dev/null @@ -1,40 +0,0 @@ -if (NOT BLITZ_CONFIG_FILE) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - set(BZ_CONFIG_FILE "llvm") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "ARMCC") - set(BZ_CONFIG_FILE "arm") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - set(BZ_CONFIG_FILE "llvm") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Cray") - set(BZ_CONFIG_FILE "cray") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(BZ_CONFIG_FILE "gnu") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GHS") - set(BZ_CONFIG_FILE "ghs") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "HP") - set(BZ_CONFIG_FILE "hp") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") - set(BZ_CONFIG_FILE "intel") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MIPSpro") - set(BZ_CONFIG_FILE "sgi") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") - set(BZ_CONFIG_FILE "ms") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "OpenWatcom") - set(BZ_CONFIG_FILE "openwatcom") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") - set(BZ_CONFIG_FILE "pgi") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PathScale") - set(BZ_CONFIG_FILE "pathscale") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "SunPro") - set(BZ_CONFIG_FILE "sun") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "XL") - set(BZ_CONFIG_FILE "ibm") - elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL ".*KCC.*") - set(BZ_CONFIG_FILE "kai") - endif() - - set(BLITZ_CONFIG_INSTALL_DIR blitz/${BZ_CONFIG_FILE}) - set(BLITZ_CONFIG_DIR "${CMAKE_CURRENT_BINARY_DIR}/${BLITZ_CONFIG_INSTALL_DIR}") - file(MAKE_DIRECTORY ${BLITZ_CONFIG_DIR}) - set(BLITZ_CONFIG_FILE "${BLITZ_CONFIG_DIR}/bzconfig.h") -endif() diff --git a/cmake/CheckCXXFeatures.cmake b/cmake/CheckCXXFeatures.cmake deleted file mode 100644 index ef8342bc..00000000 --- a/cmake/CheckCXXFeatures.cmake +++ /dev/null @@ -1,116 +0,0 @@ -set(CHECK_CXX_FEATURE_PREFIX "") -MARK_AS_ADVANCED(CHECK_CXX_FEATURE_PREFIX) - -macro(CHECK_CXX_FEATURE feature file message) - message(STATUS "Check whether the compiler ${message}") - try_compile(RESULT ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/cmake/cxx_tests/${file} - COMPILE_DEFINITIONS "${CHECK_CXX_FEATURE_DEFINITIONS}" - OUTPUT_VARIABLE OUTPUT) - - if (RESULT) - set (FOUND "found") - set (STATUS "passed") - set(CHECK_CXX_FEATURE_DEFINITIONS "${CHECK_CXX_FEATURE_DEFINITIONS} -D${feature}") - else() - set (FOUND "not found") - set (STATUS "failed") - endif() - message(STATUS "Check whether the compiler ${message} - ${FOUND}") - set("${CHECK_CXX_FEATURE_PREFIX}${feature}" ${RESULT} CACHE INTERNAL "Does the compiler ${message}") - file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log - "Determining if the CXX compiler ${message} ${STATUS} with " - "the following output:\n${OUTPUT}\n\n") -endmacro() - -macro(CHECK_CXX_GENERAL) - CHECK_CXX_FEATURE(HAVE_EXCEPTIONS exceptions.cpp "supports exceptions") - CHECK_CXX_FEATURE(HAVE_RTTI rtti.cpp "supports Run-Time Type Identification") - CHECK_CXX_FEATURE(HAVE_MEMBER_CONSTANTS member_constants.cpp "supports member constants") - CHECK_CXX_FEATURE(HAVE_OLD_FOR_SCOPING old_for_scoping.cpp "accepts the old for scoping rules") -endmacro() - -macro(CHECK_CXX_KEYWORDS) - CHECK_CXX_FEATURE(HAVE_EXPLICIT explicit.cpp "supports the explicit keyword") - CHECK_CXX_FEATURE(HAVE_MUTABLE mutable.cpp "supports the mutable keyword") - CHECK_CXX_FEATURE(HAVE_TYPENAME typename.cpp "recognizes typename") - CHECK_CXX_FEATURE(HAVE_NCEG_RESTRICT nceg_restrict.cpp "supports the Numerical C Extensions Group restrict keyword") - CHECK_CXX_FEATURE(HAVE_RESTRICT_EGCS restrict_egcs.cpp "recognizes the '__restrict__' keyword") - CHECK_CXX_FEATURE(HAVE_BOOL bool.cpp "recognizes bool as a built-in type") -endmacro() - -macro(CHECK_CXX_TYPE_CASTS) - CHECK_CXX_FEATURE(HAVE_CONST_CAST const_cast.cpp "supports const_cast<>") - CHECK_CXX_FEATURE(HAVE_STATIC_CAST static_cast.cpp "supports static_cast<>") - CHECK_CXX_FEATURE(HAVE_REINTERPRET_CAST reinterpret_cast.cpp "supports reinterpret_cast<>") - CHECK_CXX_FEATURE(HAVE_DYNAMIC_CAST dynamic_cast.cpp "supports dynamic_cast<>") -endmacro() - -macro(CHECK_CXX_TEMPLATES_FEATURES) - CHECK_CXX_FEATURE(HAVE_TEMPLATES templates.cpp "supports basic templates") - CHECK_CXX_FEATURE(HAVE_PARTIAL_SPECIALIZATION partial_specialization.cpp "supports partial specialization") - CHECK_CXX_FEATURE(HAVE_PARTIAL_ORDERING partial_ordering.cpp "supports partial ordering") - CHECK_CXX_FEATURE(HAVE_DEFAULT_TEMPLATE_PARAMETERS default_template_parameters.cpp "supports default template parameters") - CHECK_CXX_FEATURE(HAVE_MEMBER_TEMPLATES member_templates.cpp "supports member templates") - CHECK_CXX_FEATURE(HAVE_MEMBER_TEMPLATES_OUTSIDE_CLASS member_templates_outside_class.cpp "supports member templates outside the class declaration") - CHECK_CXX_FEATURE(HAVE_FULL_SPECIALIZATION_SYNTAX full_specialization_syntax.cpp "recognizes the full specialization syntax") - CHECK_CXX_FEATURE(HAVE_FUNCTION_NONTYPE_PARAMETERS function_nontype_parameters.cpp "supports function templates with non-type parameters") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_QUALIFIED_BASE_CLASS template_qualified_base_class.cpp "supports template-qualified base class specifiers") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_QUALIFIED_RETURN_TYPE template_qualified_return_type.cpp "supports template-qualified return types") - CHECK_CXX_FEATURE(HAVE_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION explicit_template_function_qualification.cpp "supports explicit template function qualification") - CHECK_CXX_FEATURE(HAVE_TEMPLATES_AS_TEMPLATE_ARGUMENTS templates_as_template_arguments.cpp "supports templates as template arguments") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_KEYWORD_QUALIFIER template_keyword_qualifier.cpp "supports use of the template keyword as a qualifier") - CHECK_CXX_FEATURE(HAVE_TEMPLATE_SCOPED_ARGUMENT_MATCHING template_scoped_argument_matching.cpp "supports function matching with argument types which are template scope-qualified") - CHECK_CXX_FEATURE(HAVE_TYPE_PROMOTION type_promotion.cpp "support the vector type promotion mechanism") - CHECK_CXX_FEATURE(HAVE_USE_NUMTRAIT use_numtrait.cpp "supports numeric traits promotions") - CHECK_CXX_FEATURE(HAVE_ENUM_COMPUTATIONS enum_computations.cpp "handles computations inside an enum") - CHECK_CXX_FEATURE(HAVE_ENUM_COMPUTATIONS_WITH_CAST enum_computations_with_cast.cpp "handles (int) casts in enum computations") -endmacro() - -macro(CHECK_HEADER variable name) - find_path(FOUND ${name} "/usr/include;/usr/include/c++") - if (FOUND) - set(value TRUE) - else() - set(value FALSE) - endif() - set("${CHECK_CXX_FEATURE_PREFIX}${variable}" ${value} CACHE INTERNAL "Whether the header ${name} exists.") -endmacro() - -macro(CHECK_CXX_STANDARD_LIBRARY) - CHECK_CXX_FEATURE(HAVE_COMPLEX have_complex.cpp "has complex") - CHECK_CXX_FEATURE(HAVE_COMPLEX_FCNS have_complex_fcns.cpp "has standard complex functions") - CHECK_HEADER(HAVE_CSTRING cstring) - CHECK_CXX_FEATURE(HAVE_NUMERIC_LIMITS have_numeric_limits.cpp "has numeric_limits") - CHECK_CXX_FEATURE(HAVE_CLIMITS have_climits.cpp "has header") - CHECK_CXX_FEATURE(HAVE_VALARRAY have_valarray.cpp "has valarray") - CHECK_CXX_FEATURE(HAVE_COMPLEX_MATH1 have_complex_math1.cpp "has complex math functions") - CHECK_CXX_FEATURE(HAVE_COMPLEX_MATH2 have_complex_math2.cpp "has more complex math functions") - CHECK_CXX_FEATURE(HAVE_IEEE_MATH have_ieee_math.cpp "supports IEEE math library") - CHECK_CXX_FEATURE(HAVE_SYSTEM_V_MATH have_system_v_math.cpp "supports System V math library") - CHECK_CXX_FEATURE(HAVE_MATH_FN_IN_NAMESPACE_STD math_fn_in_namespace_std.cpp "has C math functions in namespace std") - CHECK_CXX_FEATURE(HAVE_MATH_ABSINT_IN_NAMESPACE_STD math_absint_in_namespace_std.cpp "has C math abs(integer type) in namespace std") - CHECK_CXX_FEATURE(HAVE_COMPLEX_MATH_IN_NAMESPACE_STD complex_math_in_namespace_std.cpp "supports complex math functions are in namespace std") - CHECK_CXX_FEATURE(HAVE_ISNAN_IN_NAMESPACE_STD isnan_in_namespace_std.cpp "has isnan function in namespace std") - CHECK_CXX_FEATURE(HAVE_ISNORMAL_IN_NAMESPACE_STD isnormal_in_namespace_std.cpp "has isnormal function in namespace std") - CHECK_CXX_FEATURE(HAVE_STD have_std.cpp "supports ISO C++ standard library") - CHECK_CXX_FEATURE(HAVE_STL have_stl.cpp "supports Standard Template Library") - CHECK_CXX_FEATURE(HAVE_RUSAGE have_rusage.cpp "has getrusage() function") -endmacro() - -macro(CHECK_ALL_CXX_FEATURES) - set(CHECK_CXX_FEATURE_PREFIX ${ARGN}) - CHECK_CXX_GENERAL() - CHECK_CXX_KEYWORDS() - CHECK_CXX_TYPE_CASTS() - CHECK_CXX_TEMPLATES_FEATURES() - CHECK_CXX_STANDARD_LIBRARY() -endmacro() - -macro(CHECK_ALIGNMENT_DIRECTIVE) - set(CHECK_CXX_FEATURE_PREFIX ${ARGN}) - CHECK_CXX_FEATURE(HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE alignment_directive_win.cpp "has windows style alignment directives") - if (NOT ${CHECK_CXX_FEATURE_PREFIX}HAVE_ALIGNEMENT_DIRECTIVE_WINDOWS_STYLE STREQUAL "TRUE") - CHECK_CXX_FEATURE(HAVE_ALIGNEMENT_DIRECTIVE_GCC_STYLE alignment_directive_gcc.cpp "has gcc style alignment directives") - endif() -endmacro() diff --git a/cmake/GetGitRevisionDescription.cmake b/cmake/GetGitRevisionDescription.cmake new file mode 100644 index 00000000..87f691ad --- /dev/null +++ b/cmake/GetGitRevisionDescription.cmake @@ -0,0 +1,284 @@ +# - Returns a version string from Git +# +# These functions force a re-configure on each git commit so that you can +# trust the values of the variables in your build system. +# +# get_git_head_revision( [ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR]) +# +# Returns the refspec and sha hash of the current head revision +# +# git_describe( [ ...]) +# +# Returns the results of git describe on the source tree, and adjusting +# the output so that it tests false if an error occurs. +# +# git_describe_working_tree( [ ...]) +# +# Returns the results of git describe on the working tree (--dirty option), +# and adjusting the output so that it tests false if an error occurs. +# +# git_get_exact_tag( [ ...]) +# +# Returns the results of git describe --exact-match on the source tree, +# and adjusting the output so that it tests false if there was no exact +# matching tag. +# +# git_local_changes() +# +# Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes. +# Uses the return code of "git diff-index --quiet HEAD --". +# Does not regard untracked files. +# +# Requires CMake 2.6 or newer (uses the 'function' command) +# +# Original Author: +# 2009-2020 Ryan Pavlik +# http://academic.cleardefinition.com +# +# Copyright 2009-2013, Iowa State University. +# Copyright 2013-2020, Ryan Pavlik +# Copyright 2013-2020, Contributors +# SPDX-License-Identifier: BSL-1.0 +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) + +if(__get_git_revision_description) + return() +endif() +set(__get_git_revision_description YES) + +# We must run the following at "include" time, not at function call time, +# to find the path to this module rather than the path to a calling list file +get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH) + +# Function _git_find_closest_git_dir finds the next closest .git directory +# that is part of any directory in the path defined by _start_dir. +# The result is returned in the parent scope variable whose name is passed +# as variable _git_dir_var. If no .git directory can be found, the +# function returns an empty string via _git_dir_var. +# +# Example: Given a path C:/bla/foo/bar and assuming C:/bla/.git exists and +# neither foo nor bar contain a file/directory .git. This wil return +# C:/bla/.git +# +function(_git_find_closest_git_dir _start_dir _git_dir_var) + set(cur_dir "${_start_dir}") + set(git_dir "${_start_dir}/.git") + while(NOT EXISTS "${git_dir}") + # .git dir not found, search parent directories + set(git_previous_parent "${cur_dir}") + get_filename_component(cur_dir ${cur_dir} DIRECTORY) + if(cur_dir STREQUAL git_previous_parent) + # We have reached the root directory, we are not in git + set(${_git_dir_var} + "" + PARENT_SCOPE) + return() + endif() + set(git_dir "${cur_dir}/.git") + endwhile() + set(${_git_dir_var} + "${git_dir}" + PARENT_SCOPE) +endfunction() + +function(get_git_head_revision _refspecvar _hashvar) + _git_find_closest_git_dir("${CMAKE_CURRENT_SOURCE_DIR}" GIT_DIR) + + if("${ARGN}" STREQUAL "ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR") + set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR TRUE) + else() + set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR FALSE) + endif() + if(NOT "${GIT_DIR}" STREQUAL "") + file(RELATIVE_PATH _relative_to_source_dir "${CMAKE_SOURCE_DIR}" + "${GIT_DIR}") + if("${_relative_to_source_dir}" MATCHES "[.][.]" AND NOT ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR) + # We've gone above the CMake root dir. + set(GIT_DIR "") + endif() + endif() + if("${GIT_DIR}" STREQUAL "") + set(${_refspecvar} + "GITDIR-NOTFOUND" + PARENT_SCOPE) + set(${_hashvar} + "GITDIR-NOTFOUND" + PARENT_SCOPE) + return() + endif() + + # Check if the current source dir is a git submodule or a worktree. + # In both cases .git is a file instead of a directory. + # + if(NOT IS_DIRECTORY ${GIT_DIR}) + # The following git command will return a non empty string that + # points to the super project working tree if the current + # source dir is inside a git submodule. + # Otherwise the command will return an empty string. + # + execute_process( + COMMAND "${GIT_EXECUTABLE}" rev-parse + --show-superproject-working-tree + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_VARIABLE out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT "${out}" STREQUAL "") + # If out is empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule + file(READ ${GIT_DIR} submodule) + string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE + ${submodule}) + string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE) + get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH) + get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} + ABSOLUTE) + set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD") + else() + # GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree + file(READ ${GIT_DIR} worktree_ref) + # The .git directory contains a path to the worktree information directory + # inside the parent git repo of the worktree. + # + string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir + ${worktree_ref}) + string(STRIP ${git_worktree_dir} git_worktree_dir) + _git_find_closest_git_dir("${git_worktree_dir}" GIT_DIR) + set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD") + endif() + else() + set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD") + endif() + set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data") + if(NOT EXISTS "${GIT_DATA}") + file(MAKE_DIRECTORY "${GIT_DATA}") + endif() + + if(NOT EXISTS "${HEAD_SOURCE_FILE}") + return() + endif() + set(HEAD_FILE "${GIT_DATA}/HEAD") + configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY) + + configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in" + "${GIT_DATA}/grabRef.cmake" @ONLY) + include("${GIT_DATA}/grabRef.cmake") + + set(${_refspecvar} + "${HEAD_REF}" + PARENT_SCOPE) + set(${_hashvar} + "${HEAD_HASH}" + PARENT_SCOPE) +endfunction() + +function(git_describe _var) + if(NOT GIT_FOUND) + find_package(Git QUIET) + endif() + get_git_head_revision(refspec hash) + if(NOT GIT_FOUND) + set(${_var} + "GIT-NOTFOUND" + PARENT_SCOPE) + return() + endif() + if(NOT hash) + set(${_var} + "HEAD-HASH-NOTFOUND" + PARENT_SCOPE) + return() + endif() + + # TODO sanitize + #if((${ARGN}" MATCHES "&&") OR + # (ARGN MATCHES "||") OR + # (ARGN MATCHES "\\;")) + # message("Please report the following error to the project!") + # message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}") + #endif() + + #message(STATUS "Arguments to execute_process: ${ARGN}") + + execute_process( + COMMAND "${GIT_EXECUTABLE}" describe --tags --always ${hash} ${ARGN} + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE res + OUTPUT_VARIABLE out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT res EQUAL 0) + set(out "${out}-${res}-NOTFOUND") + endif() + + set(${_var} + "${out}" + PARENT_SCOPE) +endfunction() + +function(git_describe_working_tree _var) + if(NOT GIT_FOUND) + find_package(Git QUIET) + endif() + if(NOT GIT_FOUND) + set(${_var} + "GIT-NOTFOUND" + PARENT_SCOPE) + return() + endif() + + execute_process( + COMMAND "${GIT_EXECUTABLE}" describe --dirty ${ARGN} + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE res + OUTPUT_VARIABLE out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT res EQUAL 0) + set(out "${out}-${res}-NOTFOUND") + endif() + + set(${_var} + "${out}" + PARENT_SCOPE) +endfunction() + +function(git_get_exact_tag _var) + git_describe(out --exact-match ${ARGN}) + set(${_var} + "${out}" + PARENT_SCOPE) +endfunction() + +function(git_local_changes _var) + if(NOT GIT_FOUND) + find_package(Git QUIET) + endif() + get_git_head_revision(refspec hash) + if(NOT GIT_FOUND) + set(${_var} + "GIT-NOTFOUND" + PARENT_SCOPE) + return() + endif() + if(NOT hash) + set(${_var} + "HEAD-HASH-NOTFOUND" + PARENT_SCOPE) + return() + endif() + + execute_process( + COMMAND "${GIT_EXECUTABLE}" diff-index --quiet HEAD -- + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE res + OUTPUT_VARIABLE out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(res EQUAL 0) + set(${_var} + "CLEAN" + PARENT_SCOPE) + else() + set(${_var} + "DIRTY" + PARENT_SCOPE) + endif() +endfunction() diff --git a/cmake/GetGitRevisionDescription.cmake.in b/cmake/GetGitRevisionDescription.cmake.in new file mode 100644 index 00000000..116efc4e --- /dev/null +++ b/cmake/GetGitRevisionDescription.cmake.in @@ -0,0 +1,43 @@ +# +# Internal file for GetGitRevisionDescription.cmake +# +# Requires CMake 2.6 or newer (uses the 'function' command) +# +# Original Author: +# 2009-2010 Ryan Pavlik +# http://academic.cleardefinition.com +# Iowa State University HCI Graduate Program/VRAC +# +# Copyright 2009-2012, Iowa State University +# Copyright 2011-2015, Contributors +# Distributed under the Boost Software License, Version 1.0. +# (See accompanying file LICENSE_1_0.txt or copy at +# http://www.boost.org/LICENSE_1_0.txt) +# SPDX-License-Identifier: BSL-1.0 + +set(HEAD_HASH) + +file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) + +string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) +if(HEAD_CONTENTS MATCHES "ref") + # named branch + string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") + if(EXISTS "@GIT_DIR@/${HEAD_REF}") + configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) + else() + configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY) + file(READ "@GIT_DATA@/packed-refs" PACKED_REFS) + if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}") + set(HEAD_HASH "${CMAKE_MATCH_1}") + endif() + endif() +else() + # detached HEAD + configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) +endif() + +if(NOT HEAD_HASH) + file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) + string(STRIP "${HEAD_HASH}" HEAD_HASH) +endif() diff --git a/cmake/blitzGitSHA1.cc.in b/cmake/blitzGitSHA1.cc.in new file mode 100644 index 00000000..3704b4d7 --- /dev/null +++ b/cmake/blitzGitSHA1.cc.in @@ -0,0 +1,3 @@ +#define BLITZ_GIT_SHA1 "@BLITZ_GIT_SHA1@" + +extern const char blitz_GIT_SHA1[] = BLITZ_GIT_SHA1; diff --git a/cmake/cxx_tests/alignment_directive_gcc.cpp b/cmake/cxx_tests/alignment_directive_gcc.cpp deleted file mode 100644 index 385a3255..00000000 --- a/cmake/cxx_tests/alignment_directive_gcc.cpp +++ /dev/null @@ -1,4 +0,0 @@ -int main() { - int __attribute__ ((aligned (16))) var; - var=0; -} diff --git a/cmake/cxx_tests/alignment_directive_win.cpp b/cmake/cxx_tests/alignment_directive_win.cpp deleted file mode 100644 index 7fdf0b80..00000000 --- a/cmake/cxx_tests/alignment_directive_win.cpp +++ /dev/null @@ -1,4 +0,0 @@ -int main() { - __declspec(align(16)) int var; - var=0; -} diff --git a/cmake/cxx_tests/bool.cpp b/cmake/cxx_tests/bool.cpp deleted file mode 100644 index 122cd5b4..00000000 --- a/cmake/cxx_tests/bool.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler recognizes bool as a separate built-in type. -// Note that a typedef is not a separate type since you cannot overload a function such -// that it accepts either the basic type or the typedef. -// In sync with AC_CXX_BOOL (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -int f(int x) { return 1; } -int f(char x) { return 1; } -int f(bool x) { return 1; } - -int main() { - bool b = true; - return f(b); -} diff --git a/cmake/cxx_tests/compile_stdcxx_0x.cpp b/cmake/cxx_tests/compile_stdcxx_0x.cpp deleted file mode 100644 index e602d642..00000000 --- a/cmake/cxx_tests/compile_stdcxx_0x.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// DESCRIPTION -// -// Check for baseline language coverage in the compiler for the C++0x -// standard. -// In sync with AC_CXX_COMPILE_STDCXX_0X (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template -struct check { - static_assert(sizeof(int) <= sizeof(T), "not big enough"); -}; - -typedef check> right_angle_brackets; - -int a; -decltype(a) b; - -typedef check check_type; -check_type c; -check_type&& cr = c; - -int main() { } diff --git a/cmake/cxx_tests/compiler_vendor.cpp b/cmake/cxx_tests/compiler_vendor.cpp deleted file mode 100644 index ddbab551..00000000 --- a/cmake/cxx_tests/compiler_vendor.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// DESCRIPTION -// -// Set VENDOR-NAME to the lower-case name of the compiler vendor or -// `unknown' if the compiler's vendor is unknown. `compaq' means the CXX -// compiler as available on Tru64/OSF1/Digital Unix on Alpha machines. If -// NICKNAME is provided, set it to the compiler's usual name (eg. `g++', -// `cxx', `aCC', etc.). -// In sync with AC_CXX_COMPILER_VENDOR(VENDOR-NAME, [NICKNAME]) (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Ludovic Courtès -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -AC_DEFUN([AC_CXX_COMPILER_VENDOR], - [AC_REQUIRE([AC_PROG_CXX]) - AC_REQUIRE([AC_PROG_CXXCPP]) - AC_CACHE_CHECK([the C++ compiler vendor], - [ac_cv_cxx_compiler_vendor], - - [AC_LANG_PUSH([C++]) - - dnl GNU C++ - _AC_C_IFDEF([__GNUG__], - [ac_cv_cxx_compiler_vendor=gnu], - [_AC_C_IFDEF([__DECCXX], - [ac_cv_cxx_compiler_vendor=compaq], - [dnl HP's aCC - _AC_C_IFDEF([__HP_aCC], - [ac_cv_cxx_compiler_vendor=hp], - [dnl SGI CC - _AC_C_IFDEF([__sgi], - [ac_cv_cxx_compiler_vendor=sgi], - [dnl Note: We are using the C compiler because VC++ doesn't - dnl recognize `.cc'(which is used by `configure') as a C++ file - dnl extension and requires `/TP' to be passed. - AC_LANG_PUSH([C]) - _AC_C_IFDEF([_MSC_VER], - [ac_cv_cxx_compiler_vendor=microsoft], - [ac_cv_cxx_compiler_vendor=unknown]) - AC_LANG_POP()])])])]) - - AC_LANG_POP()]) - $1="$ac_cv_cxx_compiler_vendor" - - dnl The compiler nickname - ifelse([$2], , [], - [case "$ac_cv_cxx_compiler_vendor" in - gnu) $2=g++;; - compaq) $2=cxx;; - hp) $2=aCC;; - sgi) $2=CC;; - microsoft) $2=cl;; - *) $2=unknown;; - esac])])dnl diff --git a/cmake/cxx_tests/complex_math_in_namespace_std.cpp b/cmake/cxx_tests/complex_math_in_namespace_std.cpp deleted file mode 100644 index 7cf6b3cc..00000000 --- a/cmake/cxx_tests/complex_math_in_namespace_std.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C math functions are in the cmath header file and std:: -// namespace, define HAVE_MATH_FN_IN_NAMESPACE_STD. -// In sync with AC_CXX_COMPLEX_MATH_IN_NAMESPACE_STD (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -namespace S { - using namespace std; - complex pow(complex x, complex y) { return std::pow(x,y); } -} - -int main() { - using namespace S; - complex x = 1.0, y = 1.0; - S::pow(x,y); - return 0; -} diff --git a/cmake/cxx_tests/const_cast.cpp b/cmake/cxx_tests/const_cast.cpp deleted file mode 100644 index 7ccd4748..00000000 --- a/cmake/cxx_tests/const_cast.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports const_cast<>. -// In sync with AC_CXX_CONST_CAST (2008-04-12) -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -int main() { - int x = 0; - const int& y = x; - int& z = const_cast(y); - return z; -} diff --git a/cmake/cxx_tests/cppflags_std_lang.cpp b/cmake/cxx_tests/cppflags_std_lang.cpp deleted file mode 100644 index 1d9505bf..00000000 --- a/cmake/cxx_tests/cppflags_std_lang.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// DESCRIPTION -// -// Append to CPP-FLAGS the set of flags that should be passed to the C++ -// preprocessor in order to enable use of C++ features as defined in the -// ANSI C++ standard (eg. use of standard iostream classes in the `std' -// namespace, etc.). -// In sync with AC_CXX_CPPFLAGS_STD_LANG(CPP-FLAGS) (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Ludovic Courtès -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -AC_DEFUN([AC_CXX_CPPFLAGS_STD_LANG], - [AC_REQUIRE([AC_CXX_COMPILER_VENDOR]) - case "$ac_cv_cxx_compiler_vendor" in - compaq) - // By default, Compaq CXX has an iostream classes implementation - // that is _not_ in the `std' namespace. - $1="$$1 -D__USE_STD_IOSTREAM=1";; - hp) - // When `configure' looks for a C++ header (eg. ) it invokes - // `aCC -E $CPPFLAGS'. However, including standard C++ headers like - // fails if `-AA' is not supplied. - $1="$$1 -AA";; - esac]) diff --git a/cmake/cxx_tests/cxxflags_std_lang.cpp b/cmake/cxx_tests/cxxflags_std_lang.cpp deleted file mode 100644 index bfb7f377..00000000 --- a/cmake/cxx_tests/cxxflags_std_lang.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Append to variable CXX-FLAGS the set of compile-time flags that should -// be passed to the C++ compiler in order to enable use of C++ features as -// defined in the ANSI C++ standard (eg. use of standard iostream classes -// in the `std' namespace, etc.). Note that if you use GNU Libtool you may -// need to prefix each of those flags with `-Xcompiler' so that Libtool -// doesn't discard them (see Libtool's manual and `AC_LIBTOOLIZE_CFLAGS'). -// In sync with AC_CXX_CXXFLAGS_STD_LANG(CXX-FLAGS) (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Ludovic Courtès -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -AC_DEFUN([AC_CXX_CXXFLAGS_STD_LANG], - [AC_REQUIRE([AC_CXX_COMPILER_VENDOR]) - case "$ac_cv_cxx_compiler_vendor" in - sgi) $1="$$1 -LANG:std -exceptions";; - hp) $1="$$1 -AA";; - esac]) diff --git a/cmake/cxx_tests/default_template_parameters.cpp b/cmake/cxx_tests/default_template_parameters.cpp deleted file mode 100644 index 0db5da5c..00000000 --- a/cmake/cxx_tests/default_template_parameters.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports default template parameters. -// In sync with AC_CXX_DEFAULT_TEMPLATE_PARAMETERS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template -class A { -public: - int f() {return 0;} -}; - -int main() { - A a; - return a.f(); -} diff --git a/cmake/cxx_tests/dtor_after_atexit.cpp b/cmake/cxx_tests/dtor_after_atexit.cpp deleted file mode 100644 index 8211b779..00000000 --- a/cmake/cxx_tests/dtor_after_atexit.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler calls global destructors after atexit functions, -// define HAVE_DTOR_AFTER_ATEXIT. WARNING: If cross-compiling, the test -// cannot be performed, the default action is to define -// HAVE_DTOR_AFTER_ATEXIT. -// In sync with AC_CXX_DTOR_AFTER_ATEXIT (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include - -static int dtor_called = 0; -class A { public : ~A () { dtor_called = 1; } }; -static A a; - -void f() { _exit(dtor_called); } - -int main (int , char **) -{ - atexit (f); - return 0; -} diff --git a/cmake/cxx_tests/dynamic_cast.cpp b/cmake/cxx_tests/dynamic_cast.cpp deleted file mode 100644 index fad198d0..00000000 --- a/cmake/cxx_tests/dynamic_cast.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports dynamic_cast<>. -// In sync with AC_CXX_DYNAMIC_CAST (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -class Base { -public: - Base () {} - virtual void f() = 0; -}; - -class Derived: public Base { -public: - Derived() {} - virtual void f() {} -}; - -int main() { - Derived d; - Base& b=d; - return dynamic_cast(&b) ? 0 : 1; -}; diff --git a/cmake/cxx_tests/enum_computations.cpp b/cmake/cxx_tests/enum_computations.cpp deleted file mode 100644 index 94d2d8b6..00000000 --- a/cmake/cxx_tests/enum_computations.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler handle computations inside an enum. -// In sync with AC_CXX_ENUM_COMPUTATIONS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -struct A { enum { a = 5, b = 7, c = 2 }; }; -struct B { enum { a = 1, b = 6, c = 9 }; }; - -template -struct Z { - enum { a = (T1::a>T2::a) ? T1::a : T2::b, - b = T1::b+T2::b, - c = (T1::c*T2::c+T2::a+T1::a) }; -}; - -int main() { - return (((int)Z::a==5) && ((int)Z::b==13) && ((int)Z::c==24)) ? 0 : 1; -} diff --git a/cmake/cxx_tests/enum_computations_with_cast.cpp b/cmake/cxx_tests/enum_computations_with_cast.cpp deleted file mode 100644 index adb3f47f..00000000 --- a/cmake/cxx_tests/enum_computations_with_cast.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler handle (int) casts in enum computations. -// In sync with AC_CXX_ENUM_COMPUTATIONS_WITH_CAST (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -struct A { enum { a = 5, b = 7, c = 2 }; }; -struct B { enum { a = 1, b = 6, c = 9 }; }; - -template -struct Z { - enum { a = ((int)T1::a>(int)T2::a) ? (int)T1::a : (int)T2::b, - b = (int)T1::b+(int)T2::b, - c = ((int)T1::c*(int)T2::c+(int)T2::a+(int)T1::a) }; -}; - -int main() { - return (((int)Z::a==5) && ((int)Z::b==13) && ((int)Z::c==24)) ? 0 : 1; -} diff --git a/cmake/cxx_tests/exceptions.cpp b/cmake/cxx_tests/exceptions.cpp deleted file mode 100644 index ba5ddf9f..00000000 --- a/cmake/cxx_tests/exceptions.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports exceptions handling (try, throw and catch), -// In sync with AC_CXX_EXCEPTIONS (2008-04-12). -// -// Copyright (c) 2009 Theo Papadopoulo -// -// ORIGINAL COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -int main() { - try { throw 1; } catch (int i) { return i; } -} diff --git a/cmake/cxx_tests/explicit.cpp b/cmake/cxx_tests/explicit.cpp deleted file mode 100644 index 4c1ceaf8..00000000 --- a/cmake/cxx_tests/explicit.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler can be asked to prevent using implicitly one argument -// constructors as converting constructors with the explicit keyword. -// In sync with AC_CXX_EXPLICIT (2008-04-12) -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -class A{ -public: - explicit A(double){} -}; - -int main() { - double c = 5.0; - A x(c); - return 0; -} diff --git a/cmake/cxx_tests/explicit_instantiations.cpp b/cmake/cxx_tests/explicit_instantiations.cpp deleted file mode 100644 index a79f0526..00000000 --- a/cmake/cxx_tests/explicit_instantiations.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports explicit instanciations syntax, define -// HAVE_INSTANTIATIONS. -// In sync with AC_CXX_EXPLICIT_INSTANTIATIONS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { T t; }; -template class A; - -int main() { } diff --git a/cmake/cxx_tests/explicit_template_function_qualification.cpp b/cmake/cxx_tests/explicit_template_function_qualification.cpp deleted file mode 100644 index 739dd5d5..00000000 --- a/cmake/cxx_tests/explicit_template_function_qualification.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports explicit template function qualification. -// In sync with AC_CXX_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { public : A() {} }; - -template -A to(const A&) { return A(); } - -int main() { - A x; - A y = to(x); - return 0; -} diff --git a/cmake/cxx_tests/extern_template.cpp b/cmake/cxx_tests/extern_template.cpp deleted file mode 100644 index 731545b0..00000000 --- a/cmake/cxx_tests/extern_template.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// DESCRIPTION -// -// Test whether the C++ compiler supports "extern template". -// In sync with AC_CXX_EXTERN_TEMPLATE (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Patrick Mauritz -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template void foo(T); extern template void foo(int); - -int main() { } diff --git a/cmake/cxx_tests/full_specialization_syntax.cpp b/cmake/cxx_tests/full_specialization_syntax.cpp deleted file mode 100644 index f595884a..00000000 --- a/cmake/cxx_tests/full_specialization_syntax.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler recognizes the full specialization syntax. -// In sync with AC_CXX_FULL_SPECIALIZATION_SYNTAX (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { public : int f () const { return 1; } }; -template<> class A { public: int f () const { return 0; } }; - -int main() { - A a; - return a.f(); -} diff --git a/cmake/cxx_tests/function_nontype_parameters.cpp b/cmake/cxx_tests/function_nontype_parameters.cpp deleted file mode 100644 index 219a1433..00000000 --- a/cmake/cxx_tests/function_nontype_parameters.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports function templates with non-type parameters. -// In sync with AC_CXX_FUNCTION_NONTYPE_PARAMETERS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A {}; -template int f(const A& x) { return 0; } - -int main() { - A z; - return f(z); -} diff --git a/cmake/cxx_tests/gnucxx_hashmap.cpp b/cmake/cxx_tests/gnucxx_hashmap.cpp deleted file mode 100644 index 0082da0a..00000000 --- a/cmake/cxx_tests/gnucxx_hashmap.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// DESCRIPTION -// -// Test for the presence of GCC's hashmap STL extension. -// In sync with AC_CXX_GNUCXX_HASHMAP (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Patrick Mauritz -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using __gnu_cxx::hash_map; - -int main() { } diff --git a/cmake/cxx_tests/have_climits.cpp b/cmake/cxx_tests/have_climits.cpp deleted file mode 100644 index a9e1d8e3..00000000 --- a/cmake/cxx_tests/have_climits.cpp +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int main() { - int i = INT_MIN; - return 0; -} diff --git a/cmake/cxx_tests/have_complex.cpp b/cmake/cxx_tests/have_complex.cpp deleted file mode 100644 index 96ed9028..00000000 --- a/cmake/cxx_tests/have_complex.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has complex. -// In sync with AC_CXX_HAVE_COMPLEX (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -using namespace std; - -int main() { - complex a; - complex b; - return 0; -} diff --git a/cmake/cxx_tests/have_complex_fcns.cpp b/cmake/cxx_tests/have_complex_fcns.cpp deleted file mode 100644 index b6e9732d..00000000 --- a/cmake/cxx_tests/have_complex_fcns.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include -using namespace std; - -int main() { - complex x(1.0, 1.0); - real(x); imag(x); abs(x); arg(x); norm(x); conj(x); polar(1.0,1.0); - return 0; -} diff --git a/cmake/cxx_tests/have_complex_math1.cpp b/cmake/cxx_tests/have_complex_math1.cpp deleted file mode 100644 index d61f2851..00000000 --- a/cmake/cxx_tests/have_complex_math1.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has the complex math -// functions cos, cosh, exp, log, pow, sin, sinh, sqrt, tan and -// tanh. -// In sync with AC_CXX_HAVE_COMPLEX_MATH1 (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using namespace std; - -int main() { - complex x(1.0,1.0),y(1.0, 1.0); - cos(x); - cosh(x); - exp(x); - log(x); - pow(x,1); - pow(x,double(2.0)); - pow(x,y); - pow(double(2.0),x); - sin(x); - sinh(x); - sqrt(x); - tan(x); - tanh(x); - return 0; -} diff --git a/cmake/cxx_tests/have_complex_math2.cpp b/cmake/cxx_tests/have_complex_math2.cpp deleted file mode 100644 index 38860f65..00000000 --- a/cmake/cxx_tests/have_complex_math2.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has the complex math functions -// acos, asin, atan, atan2 and log10. -// In sync with AC_CXX_HAVE_COMPLEX_MATH2 (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using namespace std; - -int main() { - complex x(1.0,1.0),y(1.0,1.0); - acos(x); - asin(x); - atan(x); - atan2(x,y); - atan2(x,double(3.0)); - atan2(double(3.0),x); - log10(x); - return 0; -} diff --git a/cmake/cxx_tests/have_empty_iostream.cpp b/cmake/cxx_tests/have_empty_iostream.cpp deleted file mode 100644 index 3bd04dac..00000000 --- a/cmake/cxx_tests/have_empty_iostream.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// DESCRIPTION -// -// Check if the C++ compiler allow the empty iostream constructor. -// Ok before gcc3, not after. -// In sync with AC_CXX_HAVE_EMPTY_IOSTREAM (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Alain BARBET -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2 of the License, or (at your -// option) any later version. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -// Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this program. If not, see . -// -// As a special exception, the respective Autoconf Macro's copyright owner -// gives unlimited permission to copy, distribute and modify the configure -// scripts that are the output of Autoconf when processing the Macro. You -// need not follow the terms of the GNU General Public License when using -// or distributing such scripts, even though portions of the text of the -// Macro appear in them. The GNU General Public License (GPL) does govern -// all other use of the material that constitutes the Autoconf Macro. -// -// This special exception to the GPL applies to versions of the Autoconf -// Macro released by the Autoconf Macro Archive. When you make and -// distribute a modified version of the Autoconf Macro, you may extend this -// special exception to the GPL to apply to your modified version as well. - -#include -using namespace std; - -int main() { - iostream iostr; - return 0; -} diff --git a/cmake/cxx_tests/have_ext_hash_map.cpp b/cmake/cxx_tests/have_ext_hash_map.cpp deleted file mode 100644 index 56d51359..00000000 --- a/cmake/cxx_tests/have_ext_hash_map.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// DESCRIPTION -// -// Check if the C++ compiler has ext/hash_map. -// In sync with AC_CXX_HAVE_EXT_HASH_MAP (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Perceval ANICHINI -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2 of the License, or (at your -// option) any later version. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -// Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this program. If not, see . -// -// As a special exception, the respective Autoconf Macro's copyright owner -// gives unlimited permission to copy, distribute and modify the configure -// scripts that are the output of Autoconf when processing the Macro. You -// need not follow the terms of the GNU General Public License when using -// or distributing such scripts, even though portions of the text of the -// Macro appear in them. The GNU General Public License (GPL) does govern -// all other use of the material that constitutes the Autoconf Macro. -// -// This special exception to the GPL applies to versions of the Autoconf -// Macro released by the Autoconf Macro Archive. When you make and -// distribute a modified version of the Autoconf Macro, you may extend this -// special exception to the GPL to apply to your modified version as well. - -#include -using namespace std; - -int main() { - hash_map t; - return 0; -} diff --git a/cmake/cxx_tests/have_ext_hash_set.cpp b/cmake/cxx_tests/have_ext_hash_set.cpp deleted file mode 100644 index abdba8b0..00000000 --- a/cmake/cxx_tests/have_ext_hash_set.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// DESCRIPTION -// -// Check if the C++ compiler has ext/hash_set. -// In sync with AC_CXX_HAVE_EXT_HASH_SET (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Alain BARBET -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2 of the License, or (at your -// option) any later version. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -// Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this program. If not, see . -// -// As a special exception, the respective Autoconf Macro's copyright owner -// gives unlimited permission to copy, distribute and modify the configure -// scripts that are the output of Autoconf when processing the Macro. You -// need not follow the terms of the GNU General Public License when using -// or distributing such scripts, even though portions of the text of the -// Macro appear in them. The GNU General Public License (GPL) does govern -// all other use of the material that constitutes the Autoconf Macro. -// -// This special exception to the GPL applies to versions of the Autoconf -// Macro released by the Autoconf Macro Archive. When you make and -// distribute a modified version of the Autoconf Macro, you may extend this -// special exception to the GPL to apply to your modified version as well. - -#include -using namespace std; - -int main() { - hash_set t; - return 0; -} diff --git a/cmake/cxx_tests/have_ext_slist.cpp b/cmake/cxx_tests/have_ext_slist.cpp deleted file mode 100644 index 54356406..00000000 --- a/cmake/cxx_tests/have_ext_slist.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// DESCRIPTION -// -// Check if the C++ compiler has ext/slist. -// In sync with AC_CXX_HAVE_EXT_SLIST (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Alain BARBET -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2 of the License, or (at your -// option) any later version. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -// Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this program. If not, see . -// -// As a special exception, the respective Autoconf Macro's copyright owner -// gives unlimited permission to copy, distribute and modify the configure -// scripts that are the output of Autoconf when processing the Macro. You -// need not follow the terms of the GNU General Public License when using -// or distributing such scripts, even though portions of the text of the -// Macro appear in them. The GNU General Public License (GPL) does govern -// all other use of the material that constitutes the Autoconf Macro. -// -// This special exception to the GPL applies to versions of the Autoconf -// Macro released by the Autoconf Macro Archive. When you make and -// distribute a modified version of the Autoconf Macro, you may extend this -// special exception to the GPL to apply to your modified version as well. - -#include -using namespace std; - -int main() { - slist s; - return 0; -} diff --git a/cmake/cxx_tests/have_freeze_sstream.cpp b/cmake/cxx_tests/have_freeze_sstream.cpp deleted file mode 100644 index 8d73d149..00000000 --- a/cmake/cxx_tests/have_freeze_sstream.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// DESCRIPTION -// -// Check if the compiler has (need) freeze method call in stringstream/ -// strstream. Seems that Win32 and STLPort have it, libstdc++ not ... -// In sync with AC_CXX_HAVE_FREEZE_SSTREAM (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Alain BARBET -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2 of the License, or (at your -// option) any later version. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -// Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this program. If not, see . -// -// As a special exception, the respective Autoconf Macro's copyright owner -// gives unlimited permission to copy, distribute and modify the configure -// scripts that are the output of Autoconf when processing the Macro. You -// need not follow the terms of the GNU General Public License when using -// or distributing such scripts, even though portions of the text of the -// Macro appear in them. The GNU General Public License (GPL) does govern -// all other use of the material that constitutes the Autoconf Macro. -// -// This special exception to the GPL applies to versions of the Autoconf -// Macro released by the Autoconf Macro Archive. When you make and -// distribute a modified version of the Autoconf Macro, you may extend this -// special exception to the GPL to apply to your modified version as well. - -#include -using namespace std; - -int main() { -# ifdef HAVE_SSTREAM - stringstream message; -# else - strstream message; -# endif - message << "Hello"; - message.freeze(0); - return 0; -} diff --git a/cmake/cxx_tests/have_ieee_math.cpp b/cmake/cxx_tests/have_ieee_math.cpp deleted file mode 100644 index b93b088e..00000000 --- a/cmake/cxx_tests/have_ieee_math.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has the double math functions -// acosh, asinh, atanh, cbrt, expm1, erf, erfc, isnan, j0, j1, jn, gamma, -// lgamma, ilogb, logb, log1p, rint, y0, y1, yn, hypot, nextafter, remainder -// and scalb. -// In sync with AC_CXX_HAVE_IEEE_MATH (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#ifndef _ALL_SOURCE - #define _ALL_SOURCE -#endif -#ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE -#endif -#ifndef _XOPEN_SOURCE_EXTENDED - #define _XOPEN_SOURCE_EXTENDED 1 -#endif -#include - -int main() { - double x = 1.0; - double y = 1.0; - int i = 1; - acosh(x); - asinh(x); - atanh(x); - cbrt(x); - expm1(x); - erf(x); - erfc(x); - isnan(x); - j0(x); - j1(x); - jn(i,x); - ilogb(x); - logb(x); - log1p(x); - rint(x); - y0(x); - y1(x); - yn(i,x); -# ifdef _THREAD_SAFE - gamma_r(x,&i); - lgamma_r(x,&i); -# else - gamma(x); - lgamma(x); -# endif - hypot(x,y); - nextafter(x,y); - remainder(x,y); - scalb(x,y); - return 0; -} diff --git a/cmake/cxx_tests/have_long_long_for_iostream.cpp b/cmake/cxx_tests/have_long_long_for_iostream.cpp deleted file mode 100644 index fb92cc35..00000000 --- a/cmake/cxx_tests/have_long_long_for_iostream.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// DESCRIPTION -// -// Check if the C++ compiler allow long long for [i|o]stream. -// Seems that OpenBSD/gcc-3 don't have it. -// In sync with AC_CXX_HAVE_LONG_LONG_FOR_IOSTREAM (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Alain BARBET -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2 of the License, or (at your -// option) any later version. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -// Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this program. If not, see . -// -// As a special exception, the respective Autoconf Macro's copyright owner -// gives unlimited permission to copy, distribute and modify the configure -// scripts that are the output of Autoconf when processing the Macro. You -// need not follow the terms of the GNU General Public License when using -// or distributing such scripts, even though portions of the text of the -// Macro appear in them. The GNU General Public License (GPL) does govern -// all other use of the material that constitutes the Autoconf Macro. -// -// This special exception to the GPL applies to versions of the Autoconf -// Macro released by the Autoconf Macro Archive. When you make and -// distribute a modified version of the Autoconf Macro, you may extend this -// special exception to the GPL to apply to your modified version as well. - -#include - -#ifdef HAVE_SSTREAM -#include -#else -#include -#endif - -using namespace std; - -int main() { - ostream str((streambuf *)0); - long long l=1; - str << l; - return 0; -} diff --git a/cmake/cxx_tests/have_numeric_limits.cpp b/cmake/cxx_tests/have_numeric_limits.cpp deleted file mode 100644 index b436b50d..00000000 --- a/cmake/cxx_tests/have_numeric_limits.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has numeric_limits. -// In sync with AC_CXX_HAVE_NUMERIC_LIMITS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using namespace std; - -int main() { - double e = numeric_limits::epsilon(); - return 0; -} diff --git a/cmake/cxx_tests/have_rusage.cpp b/cmake/cxx_tests/have_rusage.cpp deleted file mode 100644 index 576575a6..00000000 --- a/cmake/cxx_tests/have_rusage.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include - -int main() { - struct rusage resUsage; - getrusage(RUSAGE_SELF, &resUsage); - return 0; -} diff --git a/cmake/cxx_tests/have_sstream.cpp b/cmake/cxx_tests/have_sstream.cpp deleted file mode 100644 index 836fe210..00000000 --- a/cmake/cxx_tests/have_sstream.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ library has a working stringstream. -// In sync with AC_CXX_HAVE_SSTREAM (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Ben Stanley -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -//AC_CACHE_CHECK(whether the compiler has stringstream, - -#include -using namespace std; - -int main() { - stringstream message; - message << "Hello"; - return 0; -} diff --git a/cmake/cxx_tests/have_std.cpp b/cmake/cxx_tests/have_std.cpp deleted file mode 100644 index 4d5d8307..00000000 --- a/cmake/cxx_tests/have_std.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports ISO C++ standard library -// (i.e., can include the files iostream, map, iomanip and cmath). -// In sync with AC_CXX_HAVE_STD (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include -#include -#include -using namespace std; - -int main() { return 0; } diff --git a/cmake/cxx_tests/have_stl.cpp b/cmake/cxx_tests/have_stl.cpp deleted file mode 100644 index 1100ff70..00000000 --- a/cmake/cxx_tests/have_stl.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports the Standard Template -// Library. -// In sync with AC_CXX_HAVE_STL (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include -using namespace std; - -int main() { - list x; - x.push_back(5); - list::iterator iter = x.begin(); - if (iter!=x.end()) ++iter; - return 0; -} diff --git a/cmake/cxx_tests/have_string_push_back.cpp b/cmake/cxx_tests/have_string_push_back.cpp deleted file mode 100644 index 250da837..00000000 --- a/cmake/cxx_tests/have_string_push_back.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the implementation of the C++ library provides the method -// std::string::push_back (char), define HAVE_STRING_PUSH_BACK. -// In sync with AC_CXX_HAVE_STRING_PUSH_BACK (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Jan Langer -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using namespace std; - -int main() { - string message; - message.push_back('a'); - return 0; -} diff --git a/cmake/cxx_tests/have_system_v_math.cpp b/cmake/cxx_tests/have_system_v_math.cpp deleted file mode 100644 index 79011457..00000000 --- a/cmake/cxx_tests/have_system_v_math.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has the double math functions -// _class, trunc, itrunc, nearest, rsqrt, uitrunc, copysign, drem, finite, -// and unordered. -// In sync with AC_CXX_HAVE_SYSTEM_V_MATH (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#ifndef _ALL_SOURCE - #define _ALL_SOURCE -#endif -#ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE -#endif -#ifndef _XOPEN_SOURCE_EXTENDED - #define _XOPEN_SOURCE_EXTENDED 1 -#endif -#include - -int main() { - double x = 1.0; - double y = 1.0; - _class(x); - trunc(x); - finite(x); - itrunc(x); - nearest(x); - rsqrt(x); - uitrunc(x); - copysign(x,y); - drem(x,y); - unordered(x,y); - return 0; -} diff --git a/cmake/cxx_tests/have_valarray.cpp b/cmake/cxx_tests/have_valarray.cpp deleted file mode 100644 index ed6fa201..00000000 --- a/cmake/cxx_tests/have_valarray.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler has valarray. -// In sync with AC_CXX_HAVE_VALARRAY (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using namespace std; - -int main() { - valarray x(100); - return 0; -} diff --git a/cmake/cxx_tests/have_vector_at.cpp b/cmake/cxx_tests/have_vector_at.cpp deleted file mode 100644 index 9bf464b4..00000000 --- a/cmake/cxx_tests/have_vector_at.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the implementation of the C++ library -// provides the method std::vector::at(std::size_t). -// In sync with AC_CXX_HAVE_VECTOR_AT (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Jan Langer -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using namespace std; - -int main() { - vector v(1); - message.at(0); - return 0; -} diff --git a/cmake/cxx_tests/header_pre_stdcxx.cpp b/cmake/cxx_tests/header_pre_stdcxx.cpp deleted file mode 100644 index ae4eaae7..00000000 --- a/cmake/cxx_tests/header_pre_stdcxx.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// DESCRIPTION -// -// Check whether pre-ISO-C++ headers exist. -// In sync with AC_CXX_HEADER_PRE_STDCXX (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int main() { } diff --git a/cmake/cxx_tests/header_stdcxx_0x.cpp b/cmake/cxx_tests/header_stdcxx_0x.cpp deleted file mode 100644 index 79ce04a7..00000000 --- a/cmake/cxx_tests/header_stdcxx_0x.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// DESCRIPTION -// -// Check for library coverage of the C++0x standard. -// In sync with AC_CXX_HEADER_STDCXX_0X (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int main() { } diff --git a/cmake/cxx_tests/header_stdcxx_98.cpp b/cmake/cxx_tests/header_stdcxx_98.cpp deleted file mode 100644 index 3170983d..00000000 --- a/cmake/cxx_tests/header_stdcxx_98.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// DESCRIPTION -// -// Check for complete library coverage of the C++1998/2003 standard. -// In sync with AC_CXX_HEADER_STDCXX_98 (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int main() { } diff --git a/cmake/cxx_tests/header_stdcxx_tr1.cpp b/cmake/cxx_tests/header_stdcxx_tr1.cpp deleted file mode 100644 index 2620b933..00000000 --- a/cmake/cxx_tests/header_stdcxx_tr1.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// DESCRIPTION -// -// Check for library coverage of the TR1 standard. -// In sync with AC_CXX_HEADER_STDCXX_TR1 (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int main() { } diff --git a/cmake/cxx_tests/header_tr1_unordered_map.cpp b/cmake/cxx_tests/header_tr1_unordered_map.cpp deleted file mode 100644 index 4a8cd660..00000000 --- a/cmake/cxx_tests/header_tr1_unordered_map.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// DESCRIPTION -// -// Check whether the TR1 include exists and define -// HAVE_TR1_UNORDERED_MAP if it does. -// In sync with AC_CXX_HEADER_TR1_UNORDERED_MAP (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -inf main() { - using std::tr1::unordered_map; -} diff --git a/cmake/cxx_tests/header_tr1_unordered_set.cpp b/cmake/cxx_tests/header_tr1_unordered_set.cpp deleted file mode 100644 index 4b0e4d4d..00000000 --- a/cmake/cxx_tests/header_tr1_unordered_set.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// DESCRIPTION -// -// Check whether the TR1 include exists and define -// HAVE_TR1_UNORDERED_SET if it does. -// In sync with AC_CXX_HEADER_TR1_UNORDERED_SET (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include ; - -inf main() { - using std::tr1::unordered_set; -} diff --git a/cmake/cxx_tests/header_unordered_map.cpp b/cmake/cxx_tests/header_unordered_map.cpp deleted file mode 100644 index 83b3d29e..00000000 --- a/cmake/cxx_tests/header_unordered_map.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// DESCRIPTION -// -// Check whether the C++ include exists and define -// HAVE_UNORDERED_MAP if it does. -// In sync with AC_CXX_HEADER_UNORDERED_MAP (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include ; - -int main() { - using std::unordered_map; -} diff --git a/cmake/cxx_tests/header_unordered_set.cpp b/cmake/cxx_tests/header_unordered_set.cpp deleted file mode 100644 index 72ad7e4d..00000000 --- a/cmake/cxx_tests/header_unordered_set.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// DESCRIPTION -// -// Check whether the C++ include exists and define -// HAVE_UNORDERED_SET if it does. -// In sync with AC_CXX_HEADER_UNORDERED_SET (2008-04-17). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Benjamin Kosnik -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include ; - -int main() { - using std::unordered_set; -} diff --git a/cmake/cxx_tests/isnan_in_namespace_std.cpp b/cmake/cxx_tests/isnan_in_namespace_std.cpp deleted file mode 100644 index 11018d28..00000000 --- a/cmake/cxx_tests/isnan_in_namespace_std.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the isnan function is provided by the cmath header file in std:: -// namespace, define HAVE_ISNAN_IN_NAMESPACE_STD. -// -// COPYLEFT -// -// Copyright (c) 2011 Theo Papadopoulo -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -namespace S { - bool isnan(const float x) { return std::isnan(x); } -} - -int main() { - using namespace S; - float x = 1.0; - S::isnan(x); - return 0; -} diff --git a/cmake/cxx_tests/isnormal_in_namespace_std.cpp b/cmake/cxx_tests/isnormal_in_namespace_std.cpp deleted file mode 100644 index 7ff30d12..00000000 --- a/cmake/cxx_tests/isnormal_in_namespace_std.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the isnormal function is provided by the cmath header file in std:: -// namespace, define HAVE_ISNORMAL_IN_NAMESPACE_STD. -// -// COPYLEFT -// -// Copyright (c) 2011 Theo Papadopoulo -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -namespace S { - bool isnormal(const double x) { return std::isnormal(x); } -} - -int main() { - double d = 3.0; - const bool res = S::isnormal(d); - return 0; -} diff --git a/cmake/cxx_tests/ldflags_std_lang.cpp b/cmake/cxx_tests/ldflags_std_lang.cpp deleted file mode 100644 index 6e619828..00000000 --- a/cmake/cxx_tests/ldflags_std_lang.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Append to LD-FLAGS the set of link-time flags that should be passed to -// the C++ compiler in order to enable use of C++ features as defined in -// the ANSI C++ standard (eg. use of standard iostream classes in the `std' -// namespace, etc.). Note that if you use GNU Libtool you may need to -// prefix each of those switches with `-Xlinker' so that Libtool doesn't -// discard them (see Libtool's manual and `AC_LIBTOOLIZE_LDFLAGS'). -// In sync with AC_CXX_LDFLAGS_STD_LANG(LD-FLAGS) (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Ludovic Courtès -// Copyright (c) 2009 Theo Papadopoulo -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -AC_DEFUN([AC_CXX_LDFLAGS_STD_LANG], - [AC_REQUIRE([AC_CXX_COMPILER_VENDOR]) - case "$ac_cv_cxx_compiler_vendor" in - sgi) $1="$$1 -LANG:std -exceptions";; - hp) $1="$$1 -AA";; - esac]) diff --git a/cmake/cxx_tests/math_absint_in_namespace_std.cpp b/cmake/cxx_tests/math_absint_in_namespace_std.cpp deleted file mode 100644 index ae406766..00000000 --- a/cmake/cxx_tests/math_absint_in_namespace_std.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int main() { - int i = std::abs(1); - long j = std::labs(1L); - long k = std::abs(1L); - return 0; -} diff --git a/cmake/cxx_tests/math_fn_in_namespace_std.cpp b/cmake/cxx_tests/math_fn_in_namespace_std.cpp deleted file mode 100644 index c806f742..00000000 --- a/cmake/cxx_tests/math_fn_in_namespace_std.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include - -namespace blitz { - double pow(double x, double y) { return std::pow(x,y); } -} - -int main() { - using namespace blitz; - double x = 1.0, y = 1.0; - blitz::pow(x,y); - return 0; -} diff --git a/cmake/cxx_tests/member_constants.cpp b/cmake/cxx_tests/member_constants.cpp deleted file mode 100644 index e0c42aa4..00000000 --- a/cmake/cxx_tests/member_constants.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports member constants, define HAVE_MEMBER_CONSTANTS. -// In sync with AC_CXX_MEMBER_CONSTANTS (2008-04-12) -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -class C {public: static const int i = 0;}; -const int C::i; - -int main() { - return C::i; -} diff --git a/cmake/cxx_tests/member_templates.cpp b/cmake/cxx_tests/member_templates.cpp deleted file mode 100644 index 2d515065..00000000 --- a/cmake/cxx_tests/member_templates.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports member templates. -// In sync with AC_CXX_MEMBER_TEMPLATES (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { -public: - template A operator=(const A& z) { return A(); } -}; - -int main() { - A x; - A y; - x = y; - return 0; -} diff --git a/cmake/cxx_tests/member_templates_outside_class.cpp b/cmake/cxx_tests/member_templates_outside_class.cpp deleted file mode 100644 index bb3fce7d..00000000 --- a/cmake/cxx_tests/member_templates_outside_class.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports member templates outside the class declaration. -// In sync with AC_CXX_MEMBER_TEMPLATES_OUTSIDE_CLASS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { -public : - template A operator=(const A& z); -}; - -template -template -A A::operator=(const A& z) { return A(); } - -int main() { - A x; - A y; - x = y; - return 0; -} diff --git a/cmake/cxx_tests/mutable.cpp b/cmake/cxx_tests/mutable.cpp deleted file mode 100644 index 061557a7..00000000 --- a/cmake/cxx_tests/mutable.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler allows modifying class data members flagged with the -// mutable keyword even in const objects (for example in the body of a const member function). -// In sync with AC_CXX_MUTABLE (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -class A { - mutable int i; -public: - int f (int n) const { i = n; return i; } -}; - -int main() { - A a; - return a.f(1); -} diff --git a/cmake/cxx_tests/namespace_std.cpp b/cmake/cxx_tests/namespace_std.cpp deleted file mode 100644 index 1e5ea785..00000000 --- a/cmake/cxx_tests/namespace_std.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports namespace std, define HAVE_NAMESPACE_STD. -// In sync with AC_CXX_NAMESPACE_STD (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -std::istream& is = std::cin; - -int main() { } diff --git a/cmake/cxx_tests/nceg_restrict.cpp b/cmake/cxx_tests/nceg_restrict.cpp deleted file mode 100644 index 18f1f226..00000000 --- a/cmake/cxx_tests/nceg_restrict.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports the Numerical C Extensions Group restrict -// keyword. -// -// In sync with AC_CXX_NCEG_RESTRICT (2008-04-12). -// !! Replaced by AC_C_RESTRICT in Autoconf 2.58. -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -void add(int length,double* restrict a,const double* restrict b,const double* restrict c) { - for (int i=0;i -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -int main() { - int z = 0; - for (int i = 0; i < 10; ++i) - z = z + i; - for (int i = 0; i < 10; ++i) - z = z - i; - return z; -} diff --git a/cmake/cxx_tests/old_for_scoping.cpp b/cmake/cxx_tests/old_for_scoping.cpp deleted file mode 100644 index ff66592a..00000000 --- a/cmake/cxx_tests/old_for_scoping.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler accepts the old for scoping rules (the scope of a -// variable declared inside the parentheses extends outside the for-body). -// Note that some compilers (notably g++ and egcs) support both new and old rules since they -// accept the old rules and only generate a warning. -// In sync with AC_CXX_OLD_FOR_SCOPING (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -int main() { - int z; - for (int i=0; i < 10; ++i) - z=z+i; - z=i; - return z; -} diff --git a/cmake/cxx_tests/partial_ordering.cpp b/cmake/cxx_tests/partial_ordering.cpp deleted file mode 100644 index 79add164..00000000 --- a/cmake/cxx_tests/partial_ordering.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports partial ordering. -// In sync with AC_CXX_PARTIAL_ORDERING (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template struct I {}; -template struct A { - int r; - template int operator()(T1,T2) { r = 0; return r; } - template int operator()(I, I) { r = 1; return r; } -}; - -int main() { - A x,y; - I<0> a; - I<1> b; - return x(a,b)+y(float(),double()); -} diff --git a/cmake/cxx_tests/partial_specialization.cpp b/cmake/cxx_tests/partial_specialization.cpp deleted file mode 100644 index 881a8976..00000000 --- a/cmake/cxx_tests/partial_specialization.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports partial specialization. -// In sync with AC_CXX_PARTIAL_SPECIALIZATION (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { public : enum e { z = 0 }; }; -template class A { public : enum e { z = 1 }; }; -template class A { public : enum e { z = 2 }; }; - -int main() { - return (A::z == 0) && (A::z == 1) && (A::z == 2); -} diff --git a/cmake/cxx_tests/reinterpret_cast.cpp b/cmake/cxx_tests/reinterpret_cast.cpp deleted file mode 100644 index ae6f0678..00000000 --- a/cmake/cxx_tests/reinterpret_cast.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports reinterpret_cast<>. -// In sync with AC_CXX_REINTERPRET_CAST (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -class Base { -public: - Base () {} - virtual void f() = 0; -}; - -class Derived: public Base { -public: - Derived () {} - virtual void f() {} -}; - -class Unrelated { -public: - Unrelated () {} -}; - -int g(Unrelated&) { return 0; } - -int main() { - Derived d; - Base& b=d; - Unrelated& e=reinterpret_cast(b); - return g(e); -} diff --git a/cmake/cxx_tests/restrict_egcs.cpp b/cmake/cxx_tests/restrict_egcs.cpp deleted file mode 100644 index 7a572e01..00000000 --- a/cmake/cxx_tests/restrict_egcs.cpp +++ /dev/null @@ -1,15 +0,0 @@ -void add(int length,double* __restrict__ a,const double* __restrict__ b,const double* __restrict__ c) { - for (int i=0;i -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -class Base { -public: - Base () {} - virtual int f () { return 0; } -}; - -class Derived: public Base { -public : - Derived () {} - virtual int f () { return 1; } -}; - -int main() { - Derived d; - Base *ptr = &d; - return typeid (*ptr) == typeid (Derived); -} diff --git a/cmake/cxx_tests/static_cast.cpp b/cmake/cxx_tests/static_cast.cpp deleted file mode 100644 index 809a48be..00000000 --- a/cmake/cxx_tests/static_cast.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports static_cast<>. -// In sync with AC_CXX_STATIC_CAST (2008-04-12) -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include - -class Base { -public: - Base () {} - virtual void f() = 0; -}; - -class Derived: public Base { -public: - Derived () {} - virtual void f() {} -}; - -int g (Derived&) { return 0; } - -int main() { - Derived d; - Base& b = d; - Derived& s = static_cast(b); - return g(s); -} diff --git a/cmake/cxx_tests/stlport_hashmap.cpp b/cmake/cxx_tests/stlport_hashmap.cpp deleted file mode 100644 index 68955a8e..00000000 --- a/cmake/cxx_tests/stlport_hashmap.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// DESCRIPTION -// -// Test for the presence of STLport's hashmap extension. -// In sync with AC_CXX_STLPORT_HASHMAP (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Patrick Mauritz -// Copyright (c) 2009 Theo Papadopoulo -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include -using std::hash_map; - -int main() { } diff --git a/cmake/cxx_tests/template_keyword_qualifier.cpp b/cmake/cxx_tests/template_keyword_qualifier.cpp deleted file mode 100644 index 34fd1f17..00000000 --- a/cmake/cxx_tests/template_keyword_qualifier.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports use of the template keyword as a qualifier. -// In sync with AC_CXX_TEMPLATE_KEYWORD_QUALIFIER (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Bernardo Innocenti -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -class X { -public: - template void member() {} - template static void static_member() {} -}; - -template void f(T* p) { - p->template member<200>(); // OK: < starts template argument - T::template static_member<100>(); // OK: < starts explicit qualification -} - -int main() { - X x; - f(&x); - return 0; -} diff --git a/cmake/cxx_tests/template_qualified_base_class.cpp b/cmake/cxx_tests/template_qualified_base_class.cpp deleted file mode 100644 index 76d5e1c1..00000000 --- a/cmake/cxx_tests/template_qualified_base_class.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports template-qualified base class specifiers. -// In sync with AC_CXX_TEMPLATE_QUALIFIED_BASE_CLASS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#ifndef HAVE_TYPENAME - #define typename -#endif - -class Base1 { public : int f () const { return 1; } }; -class Base2 { public : int f () const { return 0; } }; - -template struct base_trait { typedef Base1 base; }; - -#ifdef HAVE_FULL_SPECIALIZATION_SYNTAX -template<> struct base_trait { typedef Base2 base; }; -#else - struct base_trait { typedef Base2 base; }; -#endif - -template -class Weird: public base_trait::base { -public : - typedef typename base_trait::base base; - int g () const { return base::f (); } -}; - -int main() { - Weird z; - return z.g(); -} diff --git a/cmake/cxx_tests/template_qualified_return_type.cpp b/cmake/cxx_tests/template_qualified_return_type.cpp deleted file mode 100644 index 817f5099..00000000 --- a/cmake/cxx_tests/template_qualified_return_type.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports template-qualified return types. -// In sync with AC_CXX_TEMPLATE_QUALIFIED_RETURN_TYPE (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#ifndef HAVE_TYPENAME - #define typename -#endif - -template struct promote_trait { typedef X T; }; -template<> struct promote_trait { typedef float T; }; - -template class A { public : A () {} }; - -template -A::T> operator+ (const A&, const A&) { return A::T>(); } - -int main() { - A x; - A y; - A z = x + y; - return 0; -} diff --git a/cmake/cxx_tests/template_scoped_argument_matching.cpp b/cmake/cxx_tests/template_scoped_argument_matching.cpp deleted file mode 100644 index a10685d7..00000000 --- a/cmake/cxx_tests/template_scoped_argument_matching.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports function matching with argument types which are -// template scope-qualified. -// In sync with AC_CXX_TEMPLATE_SCOPED_ARGUMENT_MATCHING (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#ifndef HAVE_TYPENAME - #define typename -#endif - -template class A { public : typedef X W; }; -template class B {}; -template void operator+(B d1,typename Y::W d2) {} - -int main() { - B > z; - z+0.5f; - return 0; -} diff --git a/cmake/cxx_tests/templates.cpp b/cmake/cxx_tests/templates.cpp deleted file mode 100644 index f1b841eb..00000000 --- a/cmake/cxx_tests/templates.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports basic templates. -// In sync with AC_CXX_TEMPLATES (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class A { -public: - A() { } -}; - -template void f(const A&) { } - -int main() { - A d; - A i; - f(d); - f(i); - return 0; -} diff --git a/cmake/cxx_tests/templates_as_template_arguments.cpp b/cmake/cxx_tests/templates_as_template_arguments.cpp deleted file mode 100644 index 8c7e1d61..00000000 --- a/cmake/cxx_tests/templates_as_template_arguments.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler supports templates as template arguments. -// In sync with AC_CXX_TEMPLATES_AS_TEMPLATE_ARGUMENTS (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template class allocator { public : allocator() {}; }; - -template class T_alloc> -class A { -public: - A() {} -private: - T_alloc alloc_; -}; - -int main() { - A x; - return 0; -} diff --git a/cmake/cxx_tests/type_promotion.cpp b/cmake/cxx_tests/type_promotion.cpp deleted file mode 100644 index 9848bf48..00000000 --- a/cmake/cxx_tests/type_promotion.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef HAVE_TYPENAME - #define typename -#endif - -template struct vec3 { T data_[3]; }; -template struct promote_trait { typedef T1 T_promote; }; -template <> struct promote_trait { typedef double T_promote; }; - -template vec3::T_promote> -operator+(const vec3& a, const vec3& b) { - vec3::T_promote> c; - c.data_[0] = a.data_[0] + b.data_[0]; - c.data_[1] = a.data_[1] + b.data_[1]; - c.data_[2] = a.data_[2] + b.data_[2]; - return c; -} - -int main() { - vec3 a,b; - vec3 c,d,e; - b=a+a; - d=c+c; - e=b+d; - return 0; -} diff --git a/cmake/cxx_tests/typename.cpp b/cmake/cxx_tests/typename.cpp deleted file mode 100644 index 71623cee..00000000 --- a/cmake/cxx_tests/typename.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// DESCRIPTION -// -// whether the compiler recognizes typename the C++ compiler recognizes the typename keyword. -// In sync with AC_CXX_TYPENAME (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -template -class X { -public: - X(){} -}; - -int main() { - X z; - return 0; -} diff --git a/cmake/cxx_tests/use_numtrait.cpp b/cmake/cxx_tests/use_numtrait.cpp deleted file mode 100644 index f36367a8..00000000 --- a/cmake/cxx_tests/use_numtrait.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the C++ compiler supports numeric traits promotions. -// In sync with AC_CXX_USE_NUMTRAIT (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2008 Todd Veldhuizen -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Luc Maisonobe -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#ifndef HAVE_TYPENAME - #define typename -#endif - -template class SumType { public: typedef T_numtype T_sumtype; }; -template<> class SumType { public: typedef int T_sumtype; }; - -template class A {}; -template A::T_sumtype> sum(A) { - return A::T_sumtype>(); -} - -int main() { - A x; - sum(x); - return 0; -} diff --git a/cmake/cxx_tests/verbose_terminate_handler.cpp b/cmake/cxx_tests/verbose_terminate_handler.cpp deleted file mode 100644 index b3a3a8f1..00000000 --- a/cmake/cxx_tests/verbose_terminate_handler.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// DESCRIPTION -// -// Test to check whether the compiler does have the verbose terminate handler, define -// HAVE_VERBOSE_TERMINATE_HANDLER. -// In sync with AC_CXX_VERBOSE_TERMINATE_HANDLER (2008-04-12). -// -// COPYLEFT -// -// Copyright (c) 2009 Theo Papadopoulo -// Copyright (c) 2008 Lapo Luchini -// -// Copying and distribution of this file, with or without modification, are -// permitted in any medium without royalty provided the copyright notice -// and this notice are preserved. - -#include ; - -int main() { - std::set_terminate(__gnu_cxx::__verbose_terminate_handler); -} diff --git a/compiler/LEGAL b/compiler/LEGAL deleted file mode 100644 index eae295cf..00000000 --- a/compiler/LEGAL +++ /dev/null @@ -1,8 +0,0 @@ -This compiler test suite is (C) 1997 Todd Veldhuizen. Permission is -granted to use this test suite for non-commercial purposes only. This -suite may be redistributed so long as no fee is charged, and all the -files in the original distribution are included intact. - -If you wish to use this suite for a commercial project (i.e. testing -ISO/ANSI C++ standard compliance for a compiler), please contact me -for licensing information at . diff --git a/compiler/README b/compiler/README deleted file mode 100644 index a276f1dc..00000000 --- a/compiler/README +++ /dev/null @@ -1,22 +0,0 @@ - Blitz++ Compiler Feature Tests - - http://monet.uwaterloo.ca/blitz/compilers/ - -August 1997 - -This tar file (bzconfig.tar.gz) contains a set of small programs to -test your compiler's support of new C++ language features. Some of -these features are necessary for the Blitz++ library; others aren't. - -Run the script bzconfig by invoking bash (or sh) on it: -bash ./bzconfig - -After asking a few questions about invoking your compiler, the script -will compile and run a series of small C++ programs. The results of -the tests are written to a file config.h. - -* If you have problems getting the script to work, try setting - "verbose=1" or "set -x" at the beginning of the script file. - -Thanks, -Todd diff --git a/compiler/bool.cpp b/compiler/bool.cpp deleted file mode 100644 index 81274155..00000000 --- a/compiler/bool.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// bool treated as distinct type -// BZ_BOOL - -int foo(int x) -{ - return 1; -} - -int foo(char x) -{ - return 1; -} - -int foo(bool x) -{ - return 0; -} - -int main() -{ - bool c = true; - return foo(c); -} - diff --git a/compiler/bzconfig b/compiler/bzconfig deleted file mode 100755 index cecb1022..00000000 --- a/compiler/bzconfig +++ /dev/null @@ -1,377 +0,0 @@ -#! /bin/sh -# -# Evaluate C++ compiler implementation, to determine which kludges the -# Blitz++ library should use. -# -# With thanks to Tom Keffer (Rogue Wave Software) and Larry Wall -# -# $Id$ - -# If you need to debug this script, try uncommenting the -# next line -# set -x - -# Alternately, try setting verbose=1 on the next line -verbose=0 - -# If your platform generates .OBJ instead of .o files, you'll need -# to edit the next line -objextension='o' - -if test ! -t 0; then - echo "Use 'sh bzconfig', not 'sh < bzconfig'" - exit 1 -fi - -# clean="rm -f a.out core bztemp bztest bzjunk.cpp bzjunk.o bzjunk.ii bzjunk.int.c bzjunk.s" -clean="rm -f core" -trap '$clean; exit 1' 1 2 3 15 - -# Information about the system: -link='ln -s' - -# Information about invoking the compiler -cppinvoke='' -extension='cpp' -special='' -srcdir='.' -install=0 -interactive=1 - -# Don't bother using symbolic links, just copy - -# Does the system support symbolic links? -# echo "foo" >test.1 -# ln -s test.1 test.2 >>bztemp 2>&1 -# if grep foo test.2 >>bztemp 2>&1; then -# copy='ln -s' -# else -# copy='cp' -# fi -# rm -f test.1 test.2 -copy='cp' - -# Determine if we're running in interactive mode, or if the -# arguments were passed on the command line - -ac_prev= -for ac_option -do - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - case "$ac_option" in - --compiler=*) - cppinvoke="$ac_optarg" - interactive=0 ;; - --flags=*) - special="$ac_optarg" ;; - --srcdir=*) - srcdir="$ac_optarg" ;; - --extension=*) - extension="$ac_optarg" ;; - --install) - install=1 ;; - --help) - cat << EOF -Usage: bzconfig [options] -Options: - --compiler=PROGRAM Compiler invokation (cc, KCC, g++, vacbld, etc.) - You will be prompted if this option is missing. - --flags=FLAGS Flags for the compiler. Multiple flags can be - indicated using quotes, e.g. --flags="-x -g" - --extension=EXT Extension for C++ programs (cpp, C, cxx) - Defaults to cpp - --install Automatically install the resulting config.h - file to ../blitz/config.h (default is not to) -EOF - exit 0;; - -*) { echo "bzconfig: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - esac -done - -cat << 'EOH' - -Blitz++ compiler evaluation - -This script will test your compiler to determine which language -features it supports. - -EOH - -if test $interactive -eq 1; then - -echo "Running in interactive mode (the --compiler option was not specified)" - -echo "What is the command to invoke your C++ compiler? " -read cppinvoke - -echo " " -echo "Suggested flags:" -echo "KCC: -x --restrict" -echo "KCC under Linux: -x --restrict -D__signed__=" -echo "SGi: -n32 -experimental" -echo " " -echo "* If your compiler does not recognize the new ISO C++ keyword" -echo " \"typename,\" you should compile with -DBZ_NO_TYPENAME" -echo "* If your compiler does not implement namespaces, you should" -echo " compile with -DBZ_NO_NAMESPACES" -echo "* If your compiler needs special flags for exceptions and RTTI, don't " -echo " bother -- Blitz++ doesn't use these features, although this suite does " -echo " test for them." -echo " " -echo "Any special compile flags? (ENTER for none) " -read special - -echo " " -echo "I am assuming your compiler recognizes .cpp extensions. If not," -echo "start bzconfig again and use the --extension=EXT option." -fi - - -case "$cppinvoke" in - vacbld) - vacbld=1 - echo Using special setup for vacbld. - ;; - *) - vacbld=0 - ;; -esac - -echo " " -echo "I am now going to try a simple program." -cat <<'EOP' >bzjunk.$extension -int main() { return 0; } -EOP - -if test $vacbld -eq 1; then - echo Checking vacbld... - rm -f bzjunk - if vacbld $special $srcdir/vac.icc >>bztemp 2>&1 && - test -x bzjunk - then - echo vacbld ran successfully. - else - echo "Hmmm.. I was unable to compile a simple program." - echo "The command used was:" - echo "vacbld $special $srcdir/vac.icc" - $clean - exit 1 - fi -else -if test $verbose -eq 1; then - echo $cppinvoke $special -c bzjunk.$extension - echo test -f bzjunk.$objextension -fi - -if $cppinvoke $special -c bzjunk.$extension && # >>bztemp 2>&1 && - test -f bzjunk.$objextension -then - echo "Okay, it compiled. But will it link?" -else - echo "Hmmm.. I was unable to compile a simple program." - echo "The command line I used was:" - echo "$cppinvoke $special -c bzjunk.$extension" - echo "If your platform uses .OBJ instead of .o files, you'll need" - echo "to edit the bzconfig script and set objextension correctly." - $clean - exit 1 -fi - -if test $verbose -eq 1; then - echo $cppinvoke $special bzjunk.$objextension -o bzjunk - echo test -x bzjunk - echo sh -c ./bzjunk -fi - -if $cppinvoke $special bzjunk.$objextension -o bzjunk -lm >>bztemp 2>&1 && - sh -c ./bzjunk >>bztemp 2>&1 -then - echo "Yes, it linked too. Great." - echo " " - rm -f bzjunk.$extension bzjunk.o bzjunk -else - echo "No, I could compile, but couldn't link (or couldn't execute" - echo "the resulting file." - echo "The command line I used was:" - echo "$cppinvoke $special bzjunk.$objextension -o bzjunk -lm" - $clean - exit 1 -fi -fi - -######################################################################## - -echo " " -echo "Okay, now the fun begins." -echo " " - -rm -f config.h logfile -cat << 'EOH' >config.h -/****************************************************************************** - * config.h Compiler language support flags - * - * This file was generated automatically by the script bzconfig. - * You should rerun bzconfig each time you switch compilers, install new - * standard libraries, or change compiler versions. - * - */ - -EOH - -echo " " >>config.h -echo "#ifndef BZ_CONFIG_H" >>config.h -echo "#define BZ_CONFIG_H" >>config.h -echo " " >>config.h -echo "#define BZ_COMPILER_NAME \"$cppinvoke\"" >>config.h -echo "#define BZ_COMPILER_OPTIONS \"$special\"" >>config.h -echo "#define BZ_OS_NAME \"`uname -s -r`\"" >>config.h -echo "#define BZ_BZCONFIG_DATE \"`date`\"" >>config.h -echo "#define BZ_PLATFORM \"`$srcdir/../config.guess`\"" >>config.h -echo " " >>config.h - -# Set up a little script to make this easier -echo \#\!/bin/sh > bztest -echo cppinvoke=\"$cppinvoke\" >> bztest -echo special=\"$special\" >> bztest -echo copy=\"$copy\" >> bztest -echo extension=\"$extension\" >> bztest -echo verbose=\"$verbose\" >> bztest -echo srcdir=\"$srcdir\" >> bztest -cat << 'EOSC' >>bztest -# set -x -echo " " -echo $3 -echo " " >>logfile -echo " " >>logfile -echo " " >>logfile -echo $3 >>logfile -echo $cppinvoke $special $srcdir/$2 >>logfile - -rm -f bzjunk bzjunk.$extension bzjunk.o -if test $verbose -eq 1; then echo $copy $srcdir/$2 bzjunk.$extension; fi - -$copy $srcdir/$2 bzjunk.$extension - -if test $verbose -eq 1; then - echo $cppinvoke $special bzjunk.$extension -o bzjunk -fi - -case $cppinvoke in - vacbld) - build="vacbld $special vac.icc" - ;; - *) - build="$cppinvoke $special bzjunk.$extension -o bzjunk -lm" - ;; -esac - -if $build >>logfile 2>&1 && - test -x bzjunk && - sh -c ./bzjunk >>bztemp 2>&1 -then - echo "Yes." - echo "#define $1" >>config.h - echo "Success: $1" >>logfile -else - echo "Nope." - echo "#undef $1" >>config.h - echo "Failed: $1" >>logfile -fi -EOSC -chmod +x bztest - -# Major language features -./bztest BZ_NAMESPACES namespac.cpp "Does your compiler implement namespaces?" -./bztest BZ_EXCEPTIONS except.cpp "What about exceptions?" -./bztest BZ_RTTI rtti.cpp "Run-Time Type Identification?" -./bztest BZ_MEMBER_CONSTANTS membcnst.cpp "Member constants?" -./bztest BZ_OLD_FOR_SCOPING oldfor.cpp "Does your compiler cling to the old 'for' scoping rules?" - -# New keywords -echo " " -echo "Now for some of the new keywords." -./bztest BZ_EXPLICIT explicit.cpp "How about the 'explicit' keyword?" -./bztest BZ_MUTABLE mutable.cpp "What about the 'mutable' keyword?" -./bztest BZ_TYPENAME typename.cpp "Does your compiler recognize 'typename'?" -./bztest BZ_NCEG_RESTRICT restrict.cpp "Just on the off chance... the NCEG 'restrict' keyword?" -./bztest BZ_NCEG_RESTRICT_EGCS restric2.cpp "Maybe it recognizes __restrict__?" -./bztest BZ_BOOL bool.cpp "Does it recognize bool as a built-in type?" - -# Typecasting -echo " " -echo "Does your compiler understand the newfangled casting syntax?" -./bztest BZ_CONST_CAST constcst.cpp "What about const_cast<>?" -./bztest BZ_STATIC_CAST statcast.cpp "static_cast<>?" -./bztest BZ_REINTERPRET_CAST reinterp.cpp "reinterpret_cast<>?" -./bztest BZ_DYNAMIC_CAST dynamic.cpp "dynamic_cast<>?" - -# Templates (most important) -echo " " -echo "Okay, now the important stuff -- templates." - -./bztest BZ_TEMPLATES template.cpp "Will it handle basic templates? (If not, just give up now.)" -./bztest BZ_PARTIAL_SPECIALIZATION partial.cpp "Partial specialization?" -./bztest BZ_PARTIAL_ORDERING porder.cpp "Partial ordering?" -./bztest BZ_DEFAULT_TEMPLATE_PARAMETERS default.cpp "Default template parameters?" -./bztest BZ_MEMBER_TEMPLATES membtmpl.cpp "Member templates?" -./bztest BZ_MEMBER_TEMPLATES_OUTSIDE_CLASS membtmp2.cpp "Member templates outside the class declaration?" -./bztest BZ_FULL_SPECIALIZATION_SYNTAX fullspec.cpp "Does it recognize the full specialization syntax?" -./bztest BZ_FUNCTION_NONTYPE_PARAMETERS nontype.cpp "Function templates with non-type parameters?" -./bztest BZ_TEMPLATE_QUALIFIED_BASE_CLASS elabbase.cpp "Template-qualified base class specifiers?" -./bztest BZ_TEMPLATE_QUALIFIED_RETURN_TYPE elabret.cpp "Template-qualified return types (necessary for vector type promotion)?" -./bztest BZ_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION tempqual.cpp "Explicit template function qualification?" -./bztest BZ_TEMPLATES_AS_TEMPLATE_ARGUMENTS temptemp.cpp "Templates as template arguments?" -./bztest BZ_TEMPLATE_KEYWORD_QUALIFIER tempkey.cpp "Use of the template keyword as a qualifier?" -./bztest BZ_TEMPLATE_SCOPED_ARGUMENT_MATCHING tempqmt.cpp "Function matching with argument types which are template scope-qualified?" -./bztest BZ_TYPE_PROMOTION promote.cpp "Will it support the vector type promotion mechanism?" -./bztest BZ_USE_NUMTRAIT numtrait.cpp "Numeric traits promotions (sum type, etc.)?" -./bztest BZ_ENUM_COMPUTATIONS enumcomp.cpp "Can your compiler handle computations inside an enum?" -./bztest BZ_ENUM_COMPUTATIONS_WITH_CAST enumcmp2.cpp "Does it handle (int) casts in enum computations?" - -# Standard library -echo " " -echo "Which library features does your compiler provide?" -./bztest BZ_HAVE_COMPLEX complex.cpp "Does it have complex?" -./bztest BZ_HAVE_NUMERIC_LIMITS numlimit.cpp "Does it have numeric_limits?" -./bztest BZ_HAVE_CLIMITS climits.cpp "Does it have ?" -./bztest BZ_HAVE_VALARRAY valarray.cpp "Does it have valarray?" -./bztest BZ_HAVE_COMPLEX_MATH compmath.cpp "Complex math functions?" -./bztest BZ_HAVE_IEEE_MATH ieeemath.cpp "IEEE Math library?" -./bztest BZ_HAVE_SYSTEM_V_MATH sysvmath.cpp "System V Math library?" -./bztest BZ_MATH_FN_IN_NAMESPACE_STD mathscop.cpp "Are C math functions in and std::?" -./bztest BZ_COMPLEX_MATH_IN_NAMESPACE_STD cmthscop.cpp "Are complex math functions in std::?" -./bztest BZ_HAVE_STD std.cpp "ISO C++ Standard library?" -./bztest BZ_HAVE_STL stl.cpp "Standard template library?" -./bztest BZ_HAVE_RUSAGE getruse.cpp "What about getrusage()?" -# Clean up -$clean - -echo " " >>config.h -echo "#endif // BZ_CONFIG_H" >>config.h - -echo " " -echo The results have been written to the file config.h. - -if test $interactive -eq 1; then - -echo You should now copy this file to the location of the Blitz++ header -echo files, overwriting the current version of "". -echo " " -echo If you have installed this library in the usual fashion, the command -echo you should run is: -echo " " -echo cp config.h ../blitz -echo " " - -fi - -echo If you\'re curious about which tests passed and failed and why, see -echo this file: -ls -l logfile - - diff --git a/compiler/climits.cpp b/compiler/climits.cpp deleted file mode 100644 index e96d28a2..00000000 --- a/compiler/climits.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// has the header? - -#include - -int main() -{ - int i = INT_MIN; - return 0; -} - diff --git a/compiler/cmthscop.cpp b/compiler/cmthscop.cpp deleted file mode 100644 index 63c7626f..00000000 --- a/compiler/cmthscop.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Where are those pesky math functions? -// BZ_COMPLEX_MATH_IN_NAMESPACE_STD - -#include - -namespace blitz { - using namespace std; - - complex pow(complex x, complex y) - { return std::pow(x,y); } -}; - -int main() -{ - using namespace blitz; - complex x = 1.0, y = 1.0; - blitz::pow(x,y); - return 0; -} - diff --git a/compiler/complex.cpp b/compiler/complex.cpp deleted file mode 100644 index 7f050f56..00000000 --- a/compiler/complex.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// complex class - -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - complex a; - complex b; - return 0; -} - diff --git a/compiler/compmath.cpp b/compiler/compmath.cpp deleted file mode 100644 index 678e5652..00000000 --- a/compiler/compmath.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// BZ_HAVE_COMPLEX_MATH1 -// Complex math functions, as per 26.2.7 of the Jan'96 draft standard -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - complex x(1.0, 1.0), y(1.0, 1.0); - - real(x); - imag(x); - abs(x); - arg(x); - norm(x); - conj(x); - polar(1.0,1.0); - - cos(x); - cosh(x); - exp(x); - log(x); - log10(x); - pow(x,1); - pow(x,double(2.0)); - pow(x, y); - pow(double(2.0), x); - sin(x); - sinh(x); - sqrt(x); - tan(x); - tanh(x); - - return 0; -} - diff --git a/compiler/constcst.cpp b/compiler/constcst.cpp deleted file mode 100644 index 737c4cef..00000000 --- a/compiler/constcst.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// const_cast - - -int main() -{ - int x = 0; - const int& y = x; - - int& z = const_cast(y); - z = 3; - if (x == 3) - return 0; - - return 1; -} - diff --git a/compiler/cstd.cpp b/compiler/cstd.cpp deleted file mode 100644 index 1b12e2ab..00000000 --- a/compiler/cstd.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include - -int main() -{ - return 0; -} - diff --git a/compiler/default.cpp b/compiler/default.cpp deleted file mode 100644 index 6e9acb1f..00000000 --- a/compiler/default.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Default template parameters -// BZ_DEFAULT_TEMPLATE_PARAMETERS - -template -class foo { -public: - int bar() const - { return 0; } -}; - -int main() -{ - foo z; - return z.bar(); -} - diff --git a/compiler/dynamic.cpp b/compiler/dynamic.cpp deleted file mode 100644 index c8eb1488..00000000 --- a/compiler/dynamic.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -int main() -{ - Dalmation cairo; - Dog& doggie = cairo; - - if (dynamic_cast(&doggie)) - { - return 0; - } - - return 1; -} - diff --git a/compiler/elabbase.cpp b/compiler/elabbase.cpp deleted file mode 100644 index 776d1e22..00000000 --- a/compiler/elabbase.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - - -// Template-qualified base class specifier -// BZ_TEMPLATE_QUALIFIED_BASE_CLASS - -class base1 { -public: - int bar() const - { return 1; } -}; - -class base2 { -public: - int bar() const - { return 0; } -}; - -template -struct base_trait { - typedef base1 base; -}; - -template<> -struct base_trait { - typedef base2 base; -}; - -template -class weird : public base_trait::base { -public: - typedef typename base_trait::base base; - - int zowee() const - { return this->bar(); } -}; - -int main() -{ - weird z; - return z.zowee(); -} - diff --git a/compiler/elabret.cpp b/compiler/elabret.cpp deleted file mode 100644 index d5a7824f..00000000 --- a/compiler/elabret.cpp +++ /dev/null @@ -1,39 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - - -// Template-qualified return type, necessary for type promotion on vectors -// BZ_TEMPLATE_QUALIFIED_RETURN_TYPE - -template -struct promote_trait { - typedef X T; -}; - - -template<>struct promote_trait { - typedef float T; -}; - -template -class Vector { -public: - Vector() { } -}; - -template -Vector::T> operator+(const Vector&, - const Vector&) -{ - return Vector::T>(); -} - -int main() -{ - Vector x; - Vector y; - Vector z = x + y; - return 0; -} - diff --git a/compiler/enumcmp2.cpp b/compiler/enumcmp2.cpp deleted file mode 100644 index eaf7bb44..00000000 --- a/compiler/enumcmp2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// BZ_ENUM_COMPUTATIONS_WITH_CAST - -struct foo { - enum { a = 5, b = 7, c = 2 }; -}; - -struct bar { - enum { a = 1, b = 6, c = 9 }; -}; - -template -struct Z { - enum { a = ((int)T1::a > (int)T2::a) ? (int)T1::a : (int)T2::b, - b = (int)T1::b + (int)T2::b, - c = ((int)T1::c * (int)T2::c + (int)T2::a + (int)T1::a) - }; -}; - -int main() -{ - if (((int)Z::a == 5) && ((int)Z::b == 13) - && ((int)Z::c == 24)) - return 0; - else - return 1; -} - diff --git a/compiler/enumcomp.cpp b/compiler/enumcomp.cpp deleted file mode 100644 index dc34b9f4..00000000 --- a/compiler/enumcomp.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// BZ_ENUM_COMPUTATIONS - -struct foo { - enum { a = 5, b = 7, c = 2 }; -}; - -struct bar { - enum { a = 1, b = 6, c = 9 }; -}; - -template -struct Z { - enum { a = (T1::a > T2::a) ? T1::a : T2::b, - b = T1::b + T2::b, - c = (T1::c * T2::c + T2::a + T1::a) - }; -}; - -int main() -{ - if (((int)Z::a == 5) && ((int)Z::b == 13) - && ((int)Z::c == 24)) - return 0; - else - return 1; -} - diff --git a/compiler/except.cpp b/compiler/except.cpp deleted file mode 100644 index 09ba54a3..00000000 --- a/compiler/except.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Exceptions -// BZ_EXCEPTIONS - -#include - -class foo { }; - -int divide(int a, int b) -{ - if (b == 0) - throw foo(); - - return a / b; -} - -int main() -{ - try { - divide(5,0); - } - catch(foo x) { - return 0; - } - - return 1; -} - diff --git a/compiler/explicit.cpp b/compiler/explicit.cpp deleted file mode 100644 index 7b90d4f5..00000000 --- a/compiler/explicit.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// 'explicit' keyword -// BZ_EXPLICIT - -class vector { - public: - explicit vector(double) - { } -}; - -int main() -{ - double c = 5.0; - vector x(c); - return 0; -} - diff --git a/compiler/fullspec.cpp b/compiler/fullspec.cpp deleted file mode 100644 index b852ae5a..00000000 --- a/compiler/fullspec.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Special syntax for full specialization - -template -class foo { -public: - int bar() const - { return 1; } -}; - -template<> -class foo { -public: - int bar() const - { return 0; } -}; - -int main() -{ - foo z; - return z.bar(); -} - diff --git a/compiler/getruse.cpp b/compiler/getruse.cpp deleted file mode 100644 index 8fed74dc..00000000 --- a/compiler/getruse.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int main() -{ - struct rusage resUsage; - getrusage(RUSAGE_SELF, &resUsage); - return 0; -} diff --git a/compiler/ieeemath.cpp b/compiler/ieeemath.cpp deleted file mode 100644 index dd7415f2..00000000 --- a/compiler/ieeemath.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// BZ_HAVE_IEEE_MATH - -#if !defined(__GNUC__) - #ifndef _ALL_SOURCE - #define _ALL_SOURCE - #endif - - #ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE - #endif - - #ifndef _XOPEN_SOURCE_EXTENDED - #define _XOPEN_SOURCE_EXTENDED 1 - #endif -#endif - -#include - -// finite and trunc have been removed: -// blitz-bugs/archive/0189.html - -int main() -{ - double x = 1.0; - // double y = 1.0; - - acosh(x); - asinh(x); - atanh(x); - cbrt(x); - erf(x); - erfc(x); - expm1(x); - // finite(x); - ilogb(x); - isnan(x); - j0(x); - j1(x); -// lgamma function has different interface under AIX in threaded mode -#if !(defined(_AIX) && defined(_THREAD_SAFE)) - lgamma(x); -#endif - logb(x); - log1p(x); - rint(x); - // trunc(x); - y0(x); - y1(x); - - return 0; -} - diff --git a/compiler/instant.cpp b/compiler/instant.cpp deleted file mode 100644 index a037fb2b..00000000 --- a/compiler/instant.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Explicit template instantiation -// BZ_EXPLICIT_TEMPLATE_INSTANTIATION - -template -class Vector { -public: - Vector() { } -}; - -template class Vector; - -int main() -{ - return 0; -} - diff --git a/compiler/mathscop.cpp b/compiler/mathscop.cpp deleted file mode 100644 index 9bda92bc..00000000 --- a/compiler/mathscop.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Where are those pesky math functions? -// BZ_MATH_FN_IN_NAMESPACE_STD - -#include - -namespace blitz { - double pow(double x, double y) - { return std::pow(x,y); } -}; - -int main() -{ - using namespace blitz; - double x = 1.0, y = 1.0; - blitz::pow(x,y); - return 0; -} - - diff --git a/compiler/membcnst.cpp b/compiler/membcnst.cpp deleted file mode 100644 index c92c64ff..00000000 --- a/compiler/membcnst.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Member constants - -class Foo { -public: - static const int value = 0; -}; - -const int Foo::value; - -int main() -{ - return Foo::value; -} - diff --git a/compiler/membtmp2.cpp b/compiler/membtmp2.cpp deleted file mode 100644 index 708db734..00000000 --- a/compiler/membtmp2.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Test member function templates #2: declaration of member templates outside -// the class. -// BZ_MEMBER_TEMPLATES_OUTSIDE_CLASS - -template -class Foo { - -public: - template - Foo operator=(const Foo& z); -}; - -template template -Foo Foo::operator=(const Foo& z) -{ - return Foo(); -} - -int main() -{ - Foo x; - Foo y; - x = y; - - return 0; -} - diff --git a/compiler/membtmpl.cpp b/compiler/membtmpl.cpp deleted file mode 100644 index 9ccfb5de..00000000 --- a/compiler/membtmpl.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Test member function templates -// BZ_MEMBER_TEMPLATES - -template -class Foo { - -public: - template - Foo operator=(const Foo&); -}; - -template template -Foo Foo::operator=(const Foo& z) -{ - return Foo(); -} - -int main() -{ - Foo x; - Foo y; - x = y; - - return 0; -} - diff --git a/compiler/mutable.cpp b/compiler/mutable.cpp deleted file mode 100644 index fe357844..00000000 --- a/compiler/mutable.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// 'mutable' keyword -// BZ_MUTABLE - -class num { - -public: - num(int z) - { - x_ = z; - numReads_ = 0; - } - - void set(int z) - { x_ = z; } - - int get() const - { - ++numReads_; - return x_; - } - -private: - int x_; - mutable int numReads_; -}; - -int main() -{ - num q(4); - q.set(5); - int k = q.get(); - q.get(); - return 0; -} - diff --git a/compiler/namespac.cpp b/compiler/namespac.cpp deleted file mode 100644 index fc66161c..00000000 --- a/compiler/namespac.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// BZ_NAMESPACES - -namespace computers { - -class keyboard { - public: - int getkey() const; -}; - -int keyboard::getkey() const -{ - return 0; -} - -} - -namespace music { - -class keyboard { - public: - void playNote(int note); -}; - -} - -namespace music { - -void keyboard::playNote(int note) -{ -} - -namespace foo { - template void Xeg(T) { } -} - -} - -using namespace computers; - -int main() -{ - keyboard x; - int z = x.getkey(); - - music::keyboard y; - y.playNote(z); - - using namespace music::foo; - Xeg(z); - - return 0; -} - diff --git a/compiler/nontype.cpp b/compiler/nontype.cpp deleted file mode 100644 index 8f5d800c..00000000 --- a/compiler/nontype.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Test function templates with non-type parameters -// BZ_FUNCTION_NONTYPE_PARAMETERS - -template -class Foo { -}; - -template -void showFoo(const Foo& x) -{ -} - -int main() -{ - Foo z; - showFoo(z); - return 0; -} - - diff --git a/compiler/numlimit.cpp b/compiler/numlimit.cpp deleted file mode 100644 index ab8ea1dc..00000000 --- a/compiler/numlimit.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// numeric_limits class - -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - double e = numeric_limits::epsilon(); - return 0; -} - diff --git a/compiler/numtrait.cpp b/compiler/numtrait.cpp deleted file mode 100644 index b0a8db0e..00000000 --- a/compiler/numtrait.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - - -// BZ_USE_NUMTRAIT - -template -class SumType { -public: - typedef T_numtype T_sumtype; -}; - -template<> -class SumType { -public: - typedef int T_sumtype; -}; - -template -class Vector { -}; - -template -Vector::T_sumtype> -sum(Vector) -{ - return Vector::T_sumtype>(); -} - -int main() -{ - Vector x; - sum(x); - return 0; -} - diff --git a/compiler/oldfor.cpp b/compiler/oldfor.cpp deleted file mode 100644 index ef3c34a8..00000000 --- a/compiler/oldfor.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Old 'for' scoping rules - -int main() -{ - long z = 0; - - for (int i=0; i < 10; ++i) - { - z = z + i; - } - - z = i; - - return 0; -} - diff --git a/compiler/partial.cpp b/compiler/partial.cpp deleted file mode 100644 index 35fbc38c..00000000 --- a/compiler/partial.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// Partial specialization -// BZ_PARTIAL_SPECIALIZATION - -template -class foo { -public: - enum bar { z = 0 }; -}; - -template -class foo { -public: - enum bar { z = 1 }; -}; - -template -class foo { -public: - enum bar { z = 2 }; -}; - -int main() -{ - if ((foo::z == 0) && (foo::z == 1) - && (foo::z == 2)) - return 0; - else - return 1; -} - diff --git a/compiler/porder.cpp b/compiler/porder.cpp deleted file mode 100644 index 848d401f..00000000 --- a/compiler/porder.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// Partial ordering of member templates -// BZ_PARTIAL_ORDERING - -template -struct I { -}; - -template -struct A { - - int r; - - template - void operator()(T1, T2) - { r = 0; } - - template - void operator()(I, I) - { r = 1; } -}; - -int main() -{ - A x; - I<0> a; - I<1> b; - - x(a,b); - if (x.r != 1) - return 1; - - x(float(), double()); - if (x.r != 0) - return 1; - - return 0; -} - diff --git a/compiler/promote.cpp b/compiler/promote.cpp deleted file mode 100644 index 291d1d0b..00000000 --- a/compiler/promote.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef HAVE_TYPENAME - #define typename -#endif - -template -struct vec3 { T data_[3]; }; - -template -struct promote_trait { typedef T1 T_promote; }; -template <> -struct promote_trait { typedef double T_promote; }; - -template -vec3::T_promote> -operator+(const vec3& a, const vec3& b) { - vec3::T_promote> c; - c.data_[0] = a.data_[0] + b.data_[0]; - c.data_[1] = a.data_[1] + b.data_[1]; - c.data_[2] = a.data_[2] + b.data_[2]; - return c; -} - -int main() { - vec3 a,b; - vec3 c,d,e; - b=a+a; - d=c+c; - e=b+d; - return 0; -} diff --git a/compiler/reinterp.cpp b/compiler/reinterp.cpp deleted file mode 100644 index 3ecd46db..00000000 --- a/compiler/reinterp.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// Reinterpret cast - -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -class Unrelated { -public: - Unrelated() { } - -}; - -void foo(Unrelated&) -{ } - -int main() -{ - Dalmation cairo; - Dog& dog = cairo; - Unrelated& eek = reinterpret_cast(dog); - foo(eek); - return 0; -} - diff --git a/compiler/restric2.cpp b/compiler/restric2.cpp deleted file mode 100644 index a177638a..00000000 --- a/compiler/restric2.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// egcs support for restrict, but as "__restrict__" -// BZ_NCEG_RESTRIC2 - -void add(int length, double * __restrict__ a, const double * __restrict__ b, - const double * __restrict__ c) -{ - for (int i=0; i < length; ++i) - a[i] = b[i] + c[i]; -} - -int main() -{ - double a[10], b[10], c[10]; - for (int i=0; i < 10; ++i) - { - a[i] = 0.; - b[i] = 0.; - c[i] = 0.; - } - - add(10,a,b,c); - return 0; -} - diff --git a/compiler/restrict.cpp b/compiler/restrict.cpp deleted file mode 100644 index ba566241..00000000 --- a/compiler/restrict.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Numerical C Extensions Group (NCEG) keyword 'restrict' -// BZ_NCEG_RESTRICT - -void add(int length, double * restrict a, const double * restrict b, - const double * restrict c) -{ - for (int i=0; i < length; ++i) - a[i] = b[i] + c[i]; -} - -int main() -{ - double a[10], b[10], c[10]; - for (int i=0; i < 10; ++i) - { - a[i] = 0.; - b[i] = 0.; - c[i] = 0.; - } - - add(10,a,b,c); - return 0; -} - diff --git a/compiler/rtti.cpp b/compiler/rtti.cpp deleted file mode 100644 index feb77b5b..00000000 --- a/compiler/rtti.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -int main() -{ - Dalmation z; - Dog* y = &z; - - if (typeid(*y) == typeid(Dalmation)) - { - return 0; - } - - return 1; -} - diff --git a/compiler/statcast.cpp b/compiler/statcast.cpp deleted file mode 100644 index 407b2593..00000000 --- a/compiler/statcast.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// static_cast - -#include - -class Dog { -public: - Dog() { } - virtual void fetch() = 0; -}; - -class Dalmation : public Dog { -public: - Dalmation() { } - virtual void fetch(); -}; - -void Dalmation::fetch() -{ -} - -void foo(Dalmation&) -{ } - -int main() -{ - Dalmation cairo; - Dog& dog = cairo; - - Dalmation& spotted = static_cast(dog); - foo(spotted); - - return 0; -} - diff --git a/compiler/std.cpp b/compiler/std.cpp deleted file mode 100644 index 0b6af372..00000000 --- a/compiler/std.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - return 0; -} - diff --git a/compiler/stl.cpp b/compiler/stl.cpp deleted file mode 100644 index d4f1ffbf..00000000 --- a/compiler/stl.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - list x; - x.push_back(5); - x.push_back(10); - - int sum = 0; - - for (list::iterator iter = x.begin(); - iter != x.end(); ++iter) - { - sum += *iter; - } - - if (sum != 15) - return 1; - - return 0; -} - diff --git a/compiler/sysvmath.cpp b/compiler/sysvmath.cpp deleted file mode 100644 index 0faab481..00000000 --- a/compiler/sysvmath.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// BZ_HAVE_SYSTEM_V_MATH - -#ifndef _ALL_SOURCE - #define _ALL_SOURCE -#endif - -#ifndef _XOPEN_SOURCE - #define _XOPEN_SOURCE -#endif - -#ifndef _XOPEN_SOURCE_EXTENDED - #define _XOPEN_SOURCE_EXTENDED 1 -#endif - -#include - -int main() -{ - double x = 1.0; - double y = 1.0; - - _class(x); - itrunc(x); - nearest(x); - rsqrt(x); - uitrunc(x); - - copysign(x,y); - drem(x,y); - hypot(x,y); - nextafter(x,y); - remainder(x,y); - scalb(x,y); - unordered(x,y); - - return 0; -} - diff --git a/compiler/tempkey.cpp b/compiler/tempkey.cpp deleted file mode 100644 index 2444676d..00000000 --- a/compiler/tempkey.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// 'template' keyword qualifier -// BZ_TEMPLATE_KEYWORD_QUALIFIER - -class Foo { -public: - Foo() { }; - template static T convert() { return T(); } -}; - -template -double f() { - return Foo::template convert(); -} - -int main() -{ - double z = f(); - return 0; -} - diff --git a/compiler/template.cpp b/compiler/template.cpp deleted file mode 100644 index 9c958ce0..00000000 --- a/compiler/template.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Basic templates - -template -class Vector { -public: - Vector() { } -}; - -template -void foo(const Vector& ) -{ } - -int main() -{ - Vector x; - Vector z; - foo(x); - foo(z); - return 0; -} - diff --git a/compiler/tempqmt.cpp b/compiler/tempqmt.cpp deleted file mode 100644 index 5f17ffaf..00000000 --- a/compiler/tempqmt.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#ifdef BZ_NO_TYPENAME - #define typename -#endif - -// BZ_TEMPLATE_SCOPED_ARGUMENT_MATCHING -template -class A { -public: - typedef X W; -}; - -template -class B { - -}; - -template -void operator+(B d1, typename Y::W d2) -{ -} - -int main() -{ - B > z; - z + 0.5f; // match +(B>, A::W) - // +(B>, float) ...? - return 0; -} - diff --git a/compiler/tempqual.cpp b/compiler/tempqual.cpp deleted file mode 100644 index 123b5ee9..00000000 --- a/compiler/tempqual.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Explicit template function qualification -// BZ_EXPLICIT_TEMPLATE_FUNCTION_QUALIFICATION - -template -class Vector { -public: - Vector() { } -}; - - -template -Vector to(const Vector&) -{ - return Vector(); -} - -int main() -{ - Vector x; - Vector y = to(x); - return 0; -} - diff --git a/compiler/temptemp.cpp b/compiler/temptemp.cpp deleted file mode 100644 index 303db27e..00000000 --- a/compiler/temptemp.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Templates as template arguments -// BZ_TEMPLATES_AS_TEMPLATE_ARGUMENTS - -template -class allocator { -public: - allocator() { }; -}; - -template class T_alloc> -class foo { -public: - foo() { } - -private: - T_alloc alloc_; -}; - -int main() -{ - foo x; - return 0; -} - diff --git a/compiler/typename.cpp b/compiler/typename.cpp deleted file mode 100644 index bb1046a0..00000000 --- a/compiler/typename.cpp +++ /dev/null @@ -1,12 +0,0 @@ -template -class X { -public: - X() { } -}; - -int main() -{ - X z; - return 0; -} - diff --git a/compiler/vac.icc b/compiler/vac.icc deleted file mode 100644 index 56280593..00000000 --- a/compiler/vac.icc +++ /dev/null @@ -1,8 +0,0 @@ -option -link(debug) -{ - target "bzjunk" - { - source type(cpp) "bzjunk.cpp" - } -} diff --git a/compiler/valarray.cpp b/compiler/valarray.cpp deleted file mode 100644 index b6b9970d..00000000 --- a/compiler/valarray.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// valarray class - -#include - -#ifndef BZ_NO_NAMESPACES -using namespace std; -#endif - -int main() -{ - valarray x(100); - return 0; -} - diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt deleted file mode 100644 index 7085c54c..00000000 --- a/doc/CMakeLists.txt +++ /dev/null @@ -1,91 +0,0 @@ -add_subdirectory(examples) -add_subdirectory(stencils) - -add_custom_target(blitz-doc-prep DEPENDS stencils doc-examples) -add_custom_target(blitz-doc) - -add_subdirectory(doxygen) - -set(STENCILS - backward11.texi backward12.texi backward21.texi backward22.texi backward31.texi backward32.texi backward41.texi backward42.texi - central12.texi central14.texi central22.texi central24.texi central32.texi central34.texi central42.texi central44.texi - forward11.texi forward12.texi forward21.texi forward22.texi forward31.texi forward32.texi forward41.texi forward42.texi - Laplacian2D4.texi Laplacian2D.texi) - -foreach(i ${STENCILS}) - set(TEXINFOS ${TEXINFOS} stencil/${i}) -endforeach() - -set(EXAMPLES - cast.texi debug.texi dump.texi fixed-point.texi fixed.texi io.texi outer.texi output.texi range.texi simple.texi slicing.texi - storage.texi strideslice.texi xor.texi) - -set(OUTPUTS - cast.out debug.out dump.out fixed.out io.out outer.out output.out - range.out simple.out slicing.out storage.out strideslice.out xor.out) - -foreach(i ${EXAMPLES} ${OUTPUTS}) - set(TEXINFOS ${TEXINFOS} examples/${i}) -endforeach() - -set(TEXINFOS - about.texi arrays-ctors.texi arrays-debug.texi arrays-expr.texi arrays-globals.texi arrays-indirect.texi arrays-intro.texi - arrays-io.texi arrays-members.texi arrays-multi.texi arrays-slicing.texi arrays-stencils.texi arrays-storage.texi arrays-types.texi - arrays-usertype.texi compiling.texi constants.texi download.texi faq.texi help.texi install.texi legal.texi numinquire.texi - parallel.texi platforms.texi random.texi tau.texi tinymatrix.texi tinyvector.texi tuning.texi copyright.texi) - -set(TEXI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/blitz.texi) -set(TEXI_PROG_ARGS -I ${CMAKE_CURRENT_SOURCE_DIR} -I ${CMAKE_CURRENT_BINARY_DIR}) - -find_program(MAKEINFO makeinfo) -mark_as_advanced(MAKEINFO) -if (MAKEINFO) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/blitz.info - COMMAND ${MAKEINFO} --no-split ${TEXI_PROG_ARGS} ${TEXI_SRC} - DEPENDS blitz-doc-prep ${TEXI_SRC}) - add_custom_target(info DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/blitz.info) - add_dependencies(blitz-doc info) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/blitz.info DESTINATION ${CMAKE_INSTALL_INFODIR}) -endif() - -find_program(TEXI2HTML texi2html - ${CYGWIN_INSTALL_PATH}/bin /bin /usr/bin /usr/local/bin /sbin) -mark_as_advanced(TEXI2HTML) - -if (TEXI2HTML) - set(HTML_DIR ${CMAKE_CURRENT_BINARY_DIR}/html) - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html) - add_custom_target(html ${TEXI2HTML} ${TEXI_PROG_ARGS} --split=chapter ${TEXI_SRC} - WORKING_DIRECTORY ${HTML_DIR} - DEPENDS blitz-doc-prep - SOURCES ${TEXI_SRC}) - add_dependencies(blitz-doc html) - install(DIRECTORY ${HTML_DIR} DESTINATION ${CMAKE_INSTALL_DOCDIR}) -endif() - -find_program(TEXI2PDF texi2pdf - ${CYGWIN_INSTALL_PATH}/bin /bin /usr/bin /usr/local/bin /sbin) -mark_as_advanced(TEXI2PDF) -find_program(PDFLATEX pdflatex - ${CYGWIN_INSTALL_PATH}/bin /bin /usr/bin /usr/local/bin /sbin) - -if (TEXI2PDF AND PDFLATEX) - find_path(TEXINFO_TEX_DIR "texinfo.tex" HINTS "/usr/share/texmf/tex/texinfo/") - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/blitz.pdf - COMMAND TEXINPUTS=:${TEXINFO_TEX_DIR}:; ${TEXI2PDF} ${TEXI_PROG_ARGS} ${TEXI_SRC} - DEPENDS blitz-doc-prep ${TEXI_SRC}) - add_custom_target(pdf DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/blitz.pdf) - add_dependencies(blitz-doc pdf) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/blitz.pdf DESTINATION ${CMAKE_INSTALL_DOCDIR}) -endif() - -set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES - "blitz.html;blitz.pdf;blitz.aux;blitz.cp;blitz.cps;blitz.fn;blitz.fns;blitz.ky;blitz.log;blitz.pg;blitz.toc;blitz.tp;blitz.vr") - -foreach(i AUTHORS COPYING COPYING.LESSER COPYRIGHT LEGAL LICENSE NEWS README.md) - set(BLITZ_INFORMATION ${BLITZ_INFORMATION} ${CMAKE_SOURCE_DIR}/${i}) -endforeach() - -# Install files - -install(FILES ${BLITZ_INFORMATION} DESTINATION ${CMAKE_INSTALL_DOCDIR}) diff --git a/doc/about.texi b/doc/about.texi deleted file mode 100644 index 950c28da..00000000 --- a/doc/about.texi +++ /dev/null @@ -1,15 +0,0 @@ - -@node about, platforms, , Introduction -@section About this document - -To use the Blitz++ library, you will need a compiler with near-ISO/ANSI C++ -syntax support (see the following section for possible compilers). -Information on what platforms are supported is available from -@uref{http://oonumerics.org/blitz/platforms/}. To download Blitz++, please -go to the download page at @uref{http://oonumerics.org/blitz/download/}. - -If you need to do something that Blitz++ doesn't support, see a possible -improvement, or notice an error in the documentation, please send a note to -one of the Blitz++ mailing lists (described later). - - diff --git a/doc/arrays-ctors.texi b/doc/arrays-ctors.texi deleted file mode 100644 index 66925cf6..00000000 --- a/doc/arrays-ctors.texi +++ /dev/null @@ -1,302 +0,0 @@ - -@node Array ctors, Array slicing, Array types, Arrays -@section Constructors - -@subsection Default constructor -@cindex Array default ctor - -@example -Array(); -Array(GeneralArrayStorage storage) -@end example - -The default constructor creates a C-style array of zero size. Any attempt -to access data in the array may result in a run-time error, because there -isn't any data to access! - -An optional argument specifies a storage order for the array. - -Arrays created using the default constructor can subsequently be given data -by the @code{resize()}, @code{resizeAndPreserve()}, or @code{reference()} -member functions. - -@subsection Creating an array from an expression - -@example -Array(expression...) -@end example - -You may create an array from an array expression. For example, - -@example -Array A(4,3), B(4,3); // ... -Array C(A*2.0+B); -@end example - -This is an explicit constructor (it will not be used to perform implicit -type conversions). The newly constructed array will have the same storage -format as the arrays in the expression. If arrays with different storage -formats appear in the expression, an error will result. (In this case, you -must first construct the array, then assign the expression to it). - -@subsection Constructors which take extent parameters -@cindex Array ctors with extent parameters - -@example -Array(int extent1); -Array(int extent1, int extent2); -Array(int extent1, int extent2, int extent3); -... -Array(int extent1, int extent2, int extent3, ..., int extent11) -@end example - -These constructors take arguments which specify the size of the array to be -constructed. You should provide as many arguments as there are dimensions -in the array.@footnote{If you provide fewer than @code{N_rank} arguments, -the missing arguments will be filled in using the last provided argument. -However, for code clarity, it makes sense to provide all @code{N_rank} -parameters.} - -An optional last parameter specifies a storage format: - -@example -Array(int extent1, GeneralArrayStorage storage); -Array(int extent1, int extent2, GeneralArrayStorage storage); -... -@end example - -For high-rank arrays, it may be convenient to use this constructor: -@cindex Array high-rank - -@example -Array(const TinyVector& extent); -Array(const TinyVector& extent, - GeneralArrayStorage storage); -@end example - -The argument @code{extent} is a vector containing the extent (length) of the -array in each dimension. The optional second parameter indicates a storage -format. Note that you can construct @code{TinyVector} objects on the -fly with the @code{shape(i1,i2,...)} global function. For example, -@code{Array A(shape(3,5))} will create a 3x5 array. - -A similar constructor lets you provide both a vector of base index values -(lbounds) and extents: - -@example -Array(const TinyVector& lbound, - const TinyVector& extent); -Array(const TinyVector& lbound, - const TinyVector& extent, - GeneralArrayStorage storage); -@end example - -The argument @code{lbound} is a vector containing the base index value (or -lbound) of the array in each dimension. The argument @code{extent} is a -vector containing the extent (length) of the array in each dimension. The -optional third parameter indicates a storage format. As with the above -constructor, you can use the @code{shape(i1,i2,...)} global function to -create the @code{lbound} and @code{extent} parameters. - -@subsection Constructors with Range arguments -@cindex Array ctor with Range args - -These constructors allow arbitrary bases (starting indices) to be set: - -@example -Array(Range r1); -Array(Range r1, Range r2); -Array(Range r1, Range r2, Range r3); -... -Array(Range r1, Range r2, Range r3, ..., Range r11); -@end example - -For example, this code: - -@example -Array A(Range(10,20), Range(20,30)); -@end example - -will create an 11x11 array whose indices are 10..20 and 20..30. An optional -last parameter provides a storage order: - -@example -Array(Range r1, GeneralArrayStorage storage); -Array(Range r1, Range r2, GeneralArrayStorage storage); -... -@end example - -@subsection Referencing another array -@cindex Array referencing another array - -This constructor makes a shared view of another array's data: -@cindex Array creating a reference of another array - -@example -Array(Array& array); -@end example - -After this constructor is used, both @code{Array} objects refer to the -@emph{same data}. Any changes made to one array will appear in the other -array. If you want to make a duplicate copy of an array, use the -@code{copy()} member function. - -@subsection Constructing an array from an expression - -Arrays may be constructed from expressions, which are described in -@ref{Array Expressions}. The syntax is: - -@example -Array(...array expression...); -@end example - -For example, this code creates an array B which contains the square roots of -the elements in A: - -@example -Array A(N,N); // ... -Array B(sqrt(A)); -@end example - -@subsection Creating an array from pre-existing data -@cindex Array creating from pre-existing data - -When creating an array using a pointer to already existing data, you have -three choices for how Blitz++ will handle the data. These choices are -enumerated by the enum type @code{preexistingMemoryPolicy}: -@cindex Array creating a reference of another array - -@example -enum preexistingMemoryPolicy @{ - duplicateData, - deleteDataWhenDone, - neverDeleteData -@}; -@end example -@findex preexistingMemoryPolicy -@findex duplicateData -@findex deleteDataWhenDone -@findex neverDeleteData - -If you choose @code{duplicateData}, Blitz++ will create an array object -using a copy of the data you provide. If you choose -@code{deleteDataWhenDone}, Blitz++ will not create a copy of the data; and -when no array objects refer to the data anymore, it will deallocate the data -using @code{delete []}. Note that to use @code{deleteDataWhenDone}, your -array data must have been allocated using the C++ @code{new} operator -- for -example, you cannot allocate array data using Fortran or @code{malloc}, then -create a Blitz++ array from it using the @code{deleteDataWhenDone} flag. -The third option is @code{neverDeleteData}, which means that Blitz++ will -not never deallocate the array data. This means it is your responsibility -to determine when the array data is no longer needed, and deallocate it. -You should use this option for memory which has not been allocated using the -C++ @code{new} operator. - -These constructors create array objects from pre-existing data: - -@example -Array(T_numtype* dataFirst, TinyVector shape, - preexistingMemoryPolicy deletePolicy); -Array(T_numtype* dataFirst, TinyVector shape, - preexistingMemoryPolicy deletePolicy, - GeneralArrayStorage storage); -@end example - -The first argument is a pointer to the array data. It should point to the -element of the array which is stored first in memory. The second argument -indicates the shape of the array. You can create this argument using the -@code{shape()} function. For example: - -@example -double data[] = @{ 1, 2, 3, 4 @}; -Array A(data, shape(2,2), neverDeleteData); // Make a 2x2 array -@end example - -@findex shape() - -The @code{shape()} function takes N integer arguments and returns a -@code{TinyVector}. - -By default, Blitz++ arrays are row-major. If you want to work with data -which is stored in column-major order (e.g. a Fortran array), use the second -version of the constructor: - -@cindex Array creating from Fortran arrays - -@example -Array B(data, shape(2,2), neverDeleteData, - FortranArray<2>()); -@end example - -This is a tad awkward, so Blitz++ provides the global object -@code{fortranArray} which will convert to an instance of -@code{GeneralArrayStorage}: - -@example -Array B(data, shape(2,2), neverDeleteData, fortranArray); -@end example - -Another version of this constructor allows you to pass an arbitrary -vector of strides: - -@example -Array(T_numtype* _bz_restrict dataFirst, TinyVector shape, - TinyVector stride, - preexistingMemoryPolicy deletePolicy, - GeneralArrayStorage storage = GeneralArrayStorage()) -@end example - -@subsection Interlacing arrays -@cindex Array interlacing -@findex interlaceArrays() -@findex allocateArrays() - -For some platforms, it can be advantageous to store a set of arrays -interlaced together in memory. Blitz++ provides support for this through -the routines @code{interlaceArrays()} and @code{allocateArrays()}. An -example: - -@example -Array A, B; -interlaceArrays(shape(10,10), A, B); -@end example - -The first parameter of @code{interlaceArrays()} is the shape for the arrays -(10x10). The subsequent arguments are the set of arrays to be interlaced -together. Up to 11 arrays may be interlaced. All arrays must store the -same data type and be of the same rank. In the above example, storage is -allocated so that @code{A(0,0)} is followed immediately by @code{B(0,0)} in -memory, which is folloed by @code{A(0,1)} and @code{B(0,1)}, and so on. - -A related routine is @code{allocateArrays()}, which has identical syntax: - -@example -Array A, B; -allocateArrays(shape(10,10), A, B); -@end example - -Unlike @code{interlaceArrays()}, which always interlaces the arrays, the -routine @code{allocateArrays()} may or may not interlace them, depending on -whether interlacing is considered advantageous for your platform. If the -tuning flag @code{BZ_INTERLACE_ARRAYS} is defined in -@code{}, then the arrays are interlaced. - -Note that the performance effects of interlacing are unpredictable: in some -situations it can be a benefit, and in most others it can slow your code -down substantially. You should only use @code{interlaceArrays()} after -running some benchmarks to determine whether interlacing is beneficial for -your particular algorithm and architecture. - -@subsection A note about reference counting -@cindex Array reference counting -@cindex reference counting - -Blitz++ arrays use reference counting. When you create a new array, a -memory block is allocated. The @code{Array} object acts like a handle for -this memory block. A memory block can be shared among multiple @code{Array} -objects -- for example, when you take subarrays and slices. The memory -block keeps track of how many @code{Array} objects are referring to it. -When a memory block is orphaned -- when no @code{Array} objects are -referring to it -- it automatically deletes itself and frees the allocated -memory. - diff --git a/doc/arrays-debug.texi b/doc/arrays-debug.texi deleted file mode 100644 index 70b69385..00000000 --- a/doc/arrays-debug.texi +++ /dev/null @@ -1,34 +0,0 @@ - -@node Array debug, Array members, Array slicing, Arrays -@section Debug mode -@cindex debugging mode -@cindex bounds checking -@cindex Array bounds checking - -The Blitz++ library has a debugging mode which is enabled by defining the -preprocessor symbol @code{BZ_DEBUG}. For most compilers, the command line -argument @code{-DBZ_DEBUG} should work. - -In debugging mode, your programs will run @emph{very slowly}. This is -because Blitz++ is doing lots of precondition checking and bounds checking. -When it detects something fishy, it will likely halt your program and -display an error message. - -For example, this program attempts to access an element of a 4x4 array which -doesn't exist: - -@smallexample -@include examples/debug.texi -@end smallexample - -When compiled with @code{-DBZ_DEBUG}, the out of bounds indices are detected -and an error message results: - -@smallexample -@include examples/debug.out -@end smallexample - -Precondition failures send their error messages to the standard error stream -(@code{cerr}). After displaying the error message, @code{assert(0)} is -invoked. - diff --git a/doc/arrays-expr.texi b/doc/arrays-expr.texi deleted file mode 100644 index 01de9440..00000000 --- a/doc/arrays-expr.texi +++ /dev/null @@ -1,1506 +0,0 @@ - -@cindex expression templates -@cindex Array expressions -@cindex Array no temporaries -@cindex temporaries -@cindex Array temporaries - -Array expressions in Blitz++ are implemented using the @emph{expression -templates} technique. Unless otherwise noted, expression evaluation will -never generate temporaries or multiple loops; an expression such as - -@example -Array A, B, C, D; // ... - -A = B + C + D; -@end example - -will result in code similar to - -@example -for (int i=A.lbound(firstDim); i <= A.ubound(firstDim); ++i) - A[i] = B[i] + C[i] + D[i]; -@end example - -@node Expression evaluation, Index placeholders, , Array Expressions -@section Expression evaluation order -@cindex Array expression evaluation order -@cindex expression evaluation order -@cindex order of expression evaluation -@cindex traversal order - -A commonly asked question about Blitz++ is what order it uses to evaluate -array expressions. For example, in code such as - -@example -A(Range(2,10)) = A(Range(1,9)) -@end example - -does the expression get evaluated at indices 1, 2, ..., 9 or at 9, 8, ..., -1? This makes a big difference to the result: in one case, the array will -be shifted to the right by one element; in the other case, most of the array -elements will be set to the value in @code{A(1)}. - -Blitz always selects the traversal order it thinks will be fastest. For 1D -arrays, this means it will go from beginning to the end of the array in -memory (see notes below). For multidimensional arrays, it will do one of -two things: - -@itemize @bullet - -@item try to go through the destination array in the order it is laid out -in memory (i.e.@: row-major for row-major arrays, column-major for -column-major arrays). - -@item if the expression is a stencil, Blitz will do tiling to improve cache -use. Under some circumstances blitz will even use a traversal based on a -hilbert curve (a fractal) for 3D arrays. - -@end itemize - -Because the traversal order is not always predictable, it is safest to put -the result in a new array if you are doing a stencil-style expression. -Blitz guarantees this will always work correctly. If you try to put the -result in one of the operands, you have to guess correctly which traversal -order blitz will choose. This is easy for the 1D case, but hard for the -multidimensional case. - -Some special notes about 1D array traversals: - -@itemize @bullet - -@item if your array is stored in reverse order, i.e.@: because of a -A.reverse(firstDim) or funny storage order, blitz will go through the array -from end to beginning in array coordinates, but from beginning to end in -memory locations. - -@item many compilers/architecture combinations are equally fast at reverse -order. But blitz has a specialized version for stride = +1, and it would be -wasteful to also specialize for the case stride = -1. So 1D arrays are -traversed from beginning to end (in memory storage order). - -@end itemize - -@section Expression operands -@cindex Array expression operands - -An expression can contain any mix of these operands: - -@itemize @bullet -@item An array of any type, so long as it is of the same rank. -Expressions which contain a mixture of array types are handled through the -type promotion mechanism described below. - -@item Scalars of type @code{int}, @code{float}, @code{double}, -@code{long double}, or @code{complex} - -@item Index placeholders, described below - -@item Other expressions (e.g. @code{A+(B+C)}) -@end itemize - -@section Array operands - -@unnumberedsubsec Using subarrays in an expression - -@cindex Array using subarrays in expressions - -Subarrays may be used in an expression. For example, this code example -performs a 5-point average on a two-dimensional array: - -@example -Array A(64,64), B(64,64); // ... -Range I(1,62), J(1,62); - -A(I,J) = (B(I,J) + B(I+1,J) + B(I-1,J) - + B(I,J+1) + B(I,J-1)) / 5; -@end example - -@unnumberedsubsec Mixing arrays with different storage formats - -@cindex Array expressions which mix arrays of different storage formats - -Arrays with different storage formats (for example, C-style and -Fortran-style) can be mixed in the same expression. Blitz++ will handle the -different storage formats automatically. However: - -@itemize @bullet - -@item Evaluation may be slower, since a different traversal order may be -used. - -@item If you are using index placeholders (see below) or reductions in -the expression, you may @strong{not} mix array objects with different -starting bases. - -@end itemize - -@section Expression operators -@cindex operators, array expressions -@cindex Array operators -@cindex Array expression operators - -These binary operators are supported: - -@example -+ - * / % > < >= <= == != && || ^ & | -@end example - -@strong{Caution:} operator @code{<<} and @code{>>} are reserved for use in input/output. -If you need a bit-shift operation on arrays, you may define one yourself; -see @ref{User et}. - -These unary operators are supported: - -@example -- ~ ! -@end example - -The operators @code{> < >= <= == != && || !} result in a bool-valued -expression. - -@cindex Array operators applied elementwise -All operators are applied @emph{elementwise}. - -@cindex Array requirements for using operators -You can only use operators which are well-defined for the number type stored -in the arrays. For example, bitwise XOR (@code{^}) is meaningful for -integers, so this code is all right: - -@example -Array A, B, C; // ... -A = B ^ C; -@end example - -Bitwise XOR is @emph{not} meaningful on floating point types, so this code -will generate a compiler error: - -@example -Array A, B, C; // ... -C = B ^ C; -@end example - -Here's the compiler error generated by KAI C++ for the above code: - -@example -"../../blitz/ops.h", line 85: error: expression must have integral or enum type - BZ_DEFINE_OP(BitwiseXor,^); - ^ - detected during: - instantiation of "blitz::BitwiseXor::T_numtype - blitz::BitwiseXor::apply(float, float)" at - line 210 of "../../blitz/arrayexpr.h" - instantiation of ... - . - . -@end example - -@cindex Array arrays of user type -If you are creating arrays using a type you have created yourself, you will -need to overload whatever operators you want to use on arrays. For example, -if I create a class @code{Polynomial}, and want to write code such as: - -@example -Array A, B, C; // ... -C = A * B; -@end example - -I would have to provide @code{operator*} for @code{Polynomial} by -implementing - -@example -Polynomial Polynomial::operator*(Polynomial);) -@end example - -or - -@example -Polynomial operator*(Polynomial, Polynomial);) -@end example - -@section Assignment operators - -@cindex Array assignment operators -These assignment operators are supported: - -@example -= += -= *= /= %= ^= &= |= >>= <<= -@end example - -An array object should appear on the left side of the operator. The right -side can be: - -@itemize @bullet - -@item A constant (or literal) of type @code{T_numtype} - -@item An array of appropriate rank, possibly of a different numeric type - -@item An array expression, with appropriate rank and shape - -@end itemize - -@node Index placeholders, Math functions 1, Expression evaluation, Array Expressions -@section Index placeholders -@cindex Array index placeholders -@cindex index placeholders - -Blitz++ provides objects called @emph{index placeholders} which represent -array indices. They can be used directly in expressions. - -There is a distinct index placeholder type associated with each dimension of -an array. The types are called @code{firstIndex}, @code{secondIndex}, -@code{thirdIndex}, ..., @code{tenthIndex}, @code{eleventhIndex}. -@findex firstIndex -@findex secondIndex -@findex thirdIndex -@findex fourthIndex -Here's an example of using an index placeholder: - -@example -Array A(10); -firstIndex i; -A = i; -@end example - -This generates code which is similar to: - -@example -for (int i=0; i < A.length(); ++i) - A(i) = i; -@end example - -Here's an example which fills an array with a sampled sine wave: - -@example -Array A(16); -firstIndex i; - -A = sin(2 * M_PI * i / 16.); -@end example - -If your destination array has rank greater than 1, you may use -multiple index placeholders: - -@cindex index placeholders multiple - -@example -// Fill a two-dimensional array with a radially -// symmetric, decaying sinusoid - -// Create the array -int N = 64; -Array F(N,N); - -// Some parameters -float midpoint = (N-1)/2.; -int cycles = 3; -float omega = 2.0 * M_PI * cycles / double(N); -float tau = - 10.0 / N; - -// Index placeholders -firstIndex i; -secondIndex j; - -// Fill the array -F = cos(omega * sqrt(pow2(i-midpoint) + pow2(j-midpoint))) - * exp(tau * sqrt(pow2(i-midpoint) + pow2(j-midpoint))); -@end example - -Here's a plot of the resulting array: - -@center @image{sinsoid} -@center Array filled using an index placeholder expression. - -You can use index placeholder expressions in up to 11 dimensions. -Here's a three dimensional example: - -@example -// Fill a three-dimensional array with a Gaussian function -Array A(16,16,16); -firstIndex i; -secondIndex j; -thirdIndex k; -float midpoint = 15/2.; -float c = - 1/3.0; -A = exp(c * (sqr(i-midpoint) + sqr(j-midpoint) - + sqr(k-midpoint))); -@end example - -You can mix array operands and index placeholders: - -@example -Array A(5), B(5); -firstIndex i; - -A = 0, 1, 1, 0, 2; -B = i * A; // Results in [ 0, 1, 2, 0, 8 ] -@end example - -For your convenience, there is a namespace within blitz -called @code{tensor} which declares all the index placeholders: - -@cindex tensor namespace -@cindex @code{i} (index placeholder) -@cindex @code{j} (index placeholder) -@cindex @code{k} (index placeholder) -@cindex @code{l} (index placeholder) -@cindex @code{m} (index placeholder) -@cindex @code{n} (index placeholder) - -@example -namespace blitz @{ - namespace tensor @{ - firstIndex i; - secondIndex j; - thirdIndex k; - ... - eleventhIndex t; - @} -@} -@end example - -So instead of declaring your own index placeholder objects, -you can just say - -@findex blitz::tensor namespace - -@example -namespace blitz::tensor; -@end example - -when you would like to use them. Alternately, you can just preface all the -index placeholders with @code{tensor::}, for example: - -@example -A = sin(2 * M_PI * tensor::i / 16.); -@end example - -This will make your code more readable, since it is immediately clear that -@code{i} is an index placeholder, rather than a scalar value. - -@section Type promotion -@cindex type promotion -@cindex Array type promotion - -When operands of different numeric types are used in an expression, the -result gets promoted according to the usual C-style type promotion. For -example, the result of adding an @code{Array} to an -@code{Arrray} will be promoted to @code{float}. Generally, the -result is promoted to whichever type has greater precision. - -@unnumberedsubsec Type promotion for user-defined types - -@cindex type promotion for user-defined types -@cindex Array type promotion for user-defined types - -The rules for type promotion of user-defined types (or types from another -library) are a bit complicated. Here's how a pair of operand types are -promoted: - -@itemize @bullet - -@item If both types are intrinsic (e.g. bool, int, float) then type -promotion follows the standard C rules. This generally means that the -result will be promoted to whichever type has greater precision. In -Blitz++, these rules have been extended to incorporate -@code{complex}, @code{complex}, and @code{complex}. - -@item If one of the types is intrinsic (or complex), and the other is a -user-defined type, then the result is promoted to the user-defined type. - -@item If both types are user-defined, then the result is promoted to -whichever type requires more storage space (as determined by -@code{sizeof()}). The rationale is that more storage space probably -indicates more precision. - -@end itemize - -If you wish to alter the default type promotion rules above, you have two -choices: - -@itemize @bullet - -@findex promote_trait - -@item If the type promotion behaviour isn't dependent on the type of -operation performed, then you can provide appropriate specializations for -the class @code{promote_trait} which is declared in -@code{}. - -@item If type promotion does depend on the type of operation, then you -will need to specialize the appropriate function objects in -@code{}. - -@end itemize - -Note that you can do these specializations in your own header files (you -don't have to edit @file{promote.h} or @file{ops.h}). - -@unnumberedsubsec Manual casts - -@cindex casts -@cindex Array casts - -There are some inconvenient aspects of C-style type promotion. For example, -when you divide two integers in C, the result gets truncated. The same -problem occurs when dividing two integer arrays in Blitz++: - -@example -Array A(4), B(4); -Array C(4); - -A = 1, 2, 3, 5; -B = 2, 2, 2, 7; - -C = A / B; // Result: [ 0 1 1 0 ] -@end example - -The usual solution to this problem is to cast one of the operands to a -floating type. For this purpose, Blitz++ provides a function -@code{cast(expr,type)} which will cast the result of @emph{expr} as -@emph{type}: - -@findex cast() - -@example -C = A / cast(B, float()); // Result: [ 0.5 1 1.5 0.714 ] -@end example - -The first argument to @code{cast()} is an array or expression. The second -argument is a dummy object of the type to which you want to cast. Once -compilers support templates more thoroughly, it will be possible to use this -cast syntax: - -@example -C = A / cast(B); -@end example - -But this is not yet supported. - -@node Math functions 1, Math functions 2, Index placeholders, Array Expressions -@section Single-argument math functions - -All of the functions described in this section are @emph{element-wise}. For -example, this code-- - -@example -Array A, B; // -A = sin(B); -@end example - -results in @code{A(i,j) = sin(B(i,j))} for all (i,j). - -@unnumberedsubsec ANSI C++ math functions - -These math functions are available on all platforms: - -@cindex math functions -@cindex complex math functions - -@table @code -@item abs() -@findex abs() -Absolute value - -@item acos() -@findex acos() -Inverse cosine. For real arguments, the return value is in the range -@math{[0, \pi]}. - -@item arg() -@findex arg() -Argument of a complex number (@code{atan2(Im,Re)}). - -@item asin() -@findex asin() -Inverse sine. For real arguments, the return value is in the range -@math{[-\pi/2, \pi/2]}. - -@item atan() -@findex atan() -Inverse tangent. For real arguments, the return value is in the range -@math{[-\pi/2, \pi/2]}. See also @code{atan2()} in section -@ref{Math functions 2}. - -@item ceil() -@findex ceil() -Ceiling function: smallest floating-point integer value not less than the -argument. - -@item cexp() -@findex cexp() -Complex exponential; same as @code{exp()}. - -@item conj() -@findex conj() -Conjugate of a complex number. - -@item cos() -@findex cos() -Cosine. Works for @code{complex}. - -@item cosh() -@findex cosh() -Hyperbolic cosine. Works for @code{complex}. - -@item csqrt() -@findex csqrt() -Complex square root; same as @code{sqrt()}. - -@item exp() -@findex exp() -Exponential. Works for @code{complex}. - -@item fabs() -@findex fabs() -Same as @code{abs()}. - -@item floor() -@findex floor() -Floor function: largest floating-point integer value not greater than the -argument. - -@item log() -@findex log() -Natural logarithm. Works for @code{complex}. - -@item log10() -@findex log10() -Base 10 logarithm. Works for @code{complex}. - -@item pow2(), pow3(), pow4(), pow5(), pow6(), pow7(), pow8() -@findex pow2() -@findex pow3() -@findex pow?() -These functions compute an integer power. They expand to a series of -multiplications, so they can be used on any type for which multiplication is -well-defined. - -@item sin() -@findex sin() -Sine. Works for @code{complex}. - -@item sinh() -@findex sinh() -Hyperbolic sine. Works for @code{complex}. - -@item sqr() -@findex sqr() -Same as @code{pow2()}. Computes @code{x*x}. Works for @code{complex}. - -@item sqrt() -@findex sqrt() -Square root. Works for @code{complex}. - -@item tan() -@findex tan() -Tangent. Works for @code{complex}. - -@item tanh() -@findex tanh() -Hyperbolic tangent. Works for @code{complex}. -@end table - -@unnumberedsubsec IEEE/System V math functions - -@cindex IEEE math functions -@cindex System V math functions -@findex libm.a -@findex libmsaa.a - -These functions are only available on platforms which provide the IEEE Math -library (libm.a) and/or System V Math Library (libmsaa.a). Apparently not -all platforms provide all of these functions, so what you can use on your -platform may be a subset of these. If you choose to use one of these -functions, be aware that you may be limiting the portability of your code. - -@findex XOPEN_SOURCE -@findex XOPEN_SOURCE_EXTENDED - -On some platforms, the preprocessor symbols @code{_XOPEN_SOURCE} and/or -@code{_XOPEN_SOURCE_EXTENDED} need to be defined to use these functions. -These symbols can be enabled by compiling with -@code{-DBZ_ENABLE_XOPEN_SOURCE}. (In previous version of Blitz++, -@code{_XOPEN_SOURCE} and @code{_XOPEN_SOURCE_EXTENDED} were declared by -default. This was found to cause too many problems, so users must manually -enable them with @code{-DBZ_ENABLE_XOPEN_SOURCE}.). - -In the current version, Blitz++ divides these functions into two groups: -IEEE and System V. This distinction is probably artificial. If one of the -functions in a group is missing, Blitz++ won't allow you to use any of them. -You can see the division of these functions in the files -@file{Blitz++/compiler/ieeemath.cpp} and -@file{Blitz++/compiler/sysvmath.cpp}. This arrangement is unsatisfactory -and will probably change in a future version. - -You may have to link with @code{-lm} and/or @code{-lmsaa} to use these -functions. - -None of these functions are available for @code{complex}. - -@table @code -@item acosh() -@findex acosh() -Inverse hyperbolic cosine - -@item asinh() -@findex asinh() -Inverse hyperbolic sine - -@item atanh() -@findex atanh() -Inverse hyperbolic tangent - -@item _class() -@findex _class() -Classification of floating point values. The return type is integer and -will be one of: - - @table @code - @item FP_PLUS_NORM -@findex FP_PLUS_NORM - Positive normalized, nonzero - - @item FP_MINUS_NORM -@findex FP_MINUS_NORM - Negative normalized, nonzero - - @item FP_PLUS_DENORM -@findex FP_PLUS_DENORM - Positive denormalized, nonzero - - @item FP_MINUS_DENORM -@findex FP_MINUS_DENORM - Negative denormalized, nonzero - - @item FP_PLUS_ZERO -@findex FP_PLUS_ZERO - +0.0 - - @item FP_MINUS_ZERO -@findex FP_MINUS_ZERO - -0.0 - - @item FP_PLUS_INF -@findex FP_PLUS_INF - Positive infinity - - @item FP_MINUS_INF -@findex FP_MINUS_INF - Negative infinity - - @item FP_NANS -@findex FP_NANS - Signalling Not a Number (NaNS) - - @item FP_NANQ -@findex FP_NANQ - Quiet Not a Number (NaNQ) - @end table - -@item cbrt() -@findex cbrt() -Cubic root - -@item expm1() -@findex expm1() -Computes exp(x)-1 - -@item erf() -@findex erf() -Computes the error function: -@tex -$$ {\rm erf}(x) = {2\over\sqrt\pi}\int_{0}^{x} e^{-t^2} dt $$ -@end tex -@html -erf(x) = 2/sqrt(Pi) * integral(exp(-t^2), t=0..x) -@end html -@ifnottex -@ifnothtml -@math{@r{erf}(x) = 2/@r{sqrt}(Pi) * @r{integral}(@r{exp}(-t^2), t=0..x)} -@end ifnothtml -@end ifnottex - -Note that for large values of the parameter, calculating can result in -extreme loss of accuracy. Instead, use @code{erfc()}. - -@item erfc() -@findex erfc() -Computes the complementary error function @math{@r{erfc}(x) = 1 - @r{erf}(x)}. - -@item finite() -@findex finite() -Returns a nonzero integer if the parameter is a finite number (i.e.@: not -+INF, -INF, NaNQ or NaNS). - -@item ilogb() -@findex ilogb() -Returns an integer which is equal to the unbiased exponent of -the parameter. - -@item blitz_isnan() -@findex blitz_isnan() -@findex isnan() -Returns a nonzero integer if the parameter is NaNQ or -NaNS (quiet or signalling Not a Number). - -@item itrunc() -@findex itrunc() -Round a floating-point number to a signed integer. Returns -the nearest signed integer to the parameter in the direction of 0. - -@item j0() -@findex j0() -@cindex Bessel functions -Bessel function of the first kind, order 0. - -@item j1() -@findex j1() -Bessel function of the first kind, order 1. - -@item lgamma() -@findex lgamma() -@cindex Gamma function -Natural logarithm of the gamma function. The gamma function -is defined as: -@tex -$$ {\rm Gamma}(x) = \int_0^\infty e^{-t}t^{x-1} dt $$ -@end tex -@html -Gamma(x) = integral(e^(-t) * t^(x-1), t=0..infinity)) -@end html -@ifnottex -@ifnothtml -@math{@r{Gamma}(x) = @r{integral}(e^(-t) * t^(x-1), t=0..@r{infinity}))} -@end ifnothtml -@end ifnottex - -@item logb() -@findex logb() -Returns a floating-point double that is equal to the unbiased -exponent of the parameter. - -@item log1p() -@findex log1p() -Calculates log(1+x), where x is the parameter. - -@item nearest() -@findex nearest() -Returns the nearest floating-point integer value to the -parameter. If the parameter is exactly halfway between two integer values, -an even value is returned. - -@item rint() -@findex rint() -@cindex rounding -Rounds the parameter and returns a floating-point integer value. Whether -@code{rint()} rounds up or down or to the nearest integer depends on the -current floating-point rounding mode. If you haven't altered the rounding -mode, @code{rint()} should be equivalent to @code{nearest()}. If rounding -mode is set to round towards +INF, @code{rint()} is equivalent to -@code{ceil()}. If the mode is round toward -INF, @code{rint()} is -equivalent to @code{floor()}. If the mode is round toward zero, -@code{rint()} is equivalent to @code{trunc()}. - -@item rsqrt() -@findex rsqrt() -Reciprocal square root. - -@item uitrunc() -@findex uitrunc() -Returns the nearest unsigned integer to the parameter in the -direction of zero. - -@item y0() -@findex y0() -Bessel function of the second kind, order 0. - -@item y1() -@findex y1() -Bessel function of the second kind, order 1. -@end table - -There may be better descriptions of these functions in your -system man pages. - -@node Math functions 2, User et, Math functions 1, Array Expressions -@section Two-argument math functions - -The math functions described in this section take two arguments. -Most combinations of these types may be used as arguments: - -@itemize @bullet -@item An Array object -@item An Array expression -@item An index placeholder -@item A scalar of type @code{float}, @code{double}, @code{long double}, -or @code{complex} -@end itemize - -@unnumberedsubsec ANSI C++ math functions - -These math functions are available on all platforms, and work for -complex numbers. - -@cindex math functions -@cindex complex math functions - -@table @code -@item atan2(x,y) -@findex atan2() -Inverse tangent of (y/x). The signs of both parameters -are used to determine the quadrant of the return value, which is in the -range @math{[-\pi, \pi]}. Works for @code{complex}. - -@item blitz::polar(r,t) -@findex polar() -Computes ; i.e.@: converts polar-form to -Cartesian form complex numbers. The @code{blitz::} scope qualifier is -needed to disambiguate the ANSI C++ function template @code{polar(T,T)}. -This qualifier will hopefully disappear in a future version. - -@item pow(x,y) -@findex pow() -Computes x to the exponent y. Works for @code{complex}. -@end table - -@unnumberedsubsec IEEE/System V math functions - -See the notes about IEEE/System V math functions in the previous section. -None of these functions work for complex numbers. They will all cast their -arguments to double precision. - -@table @code -@item copysign(x,y) -@findex copysign() -Returns the x parameter with the same sign as the y parameter. - -@item drem(x,y) -@findex drem() -@cindex remainder, floating point @code{drem()} -Computes a floating point remainder. The return value r is equal to r = x - -n * y, where n is equal to @code{nearest(x/y)} (the nearest integer to x/y). -The return value will lie in the range [ -y/2, +y/2 ]. If y is zero or x is -+INF or -INF, NaNQ is returned. - -@item fmod(x,y) -@findex fmod() -@cindex modulo, floating point @code{fmod()} -Computes a floating point modulo remainder. The return value r is equal to -r = x - n * y, where n is selected so that r has the same sign as x and -magnitude less than abs(y). In order words, if x > 0, r is in the range [0, -|y|], and if x < 0, r is in the range [-|y|, 0]. - -@item hypot(x,y) -@findex hypot() -Computes so that underflow does not occur and overflow occurs only if the -final result warrants it. - -@item nextafter(x,y) -@findex nextafter() -Returns the next representable number after x in the direction of y. - -@item remainder(x,y) -@findex remainder() -Equivalent to drem(x,y). - -@item scalb(x,y) -@findex scalb() -Calculates. - -@item unordered(x,y) -@findex unordered() -Returns a nonzero value if a floating-point comparison between x and y would -be unordered. Otherwise, it returns zero. -@end table - -@node User et, Where expr, Math functions 2, Array Expressions -@section Declaring your own math functions on arrays - -@cindex math functions declaring your own -@cindex Array declaring your own math functions on - -There are four macros which make it easy to turn your own scalar functions -into functions defined on arrays. They are: - -@findex BZ_DECLARE_FUNCTION - -@example -BZ_DECLARE_FUNCTION(f) // 1 -BZ_DECLARE_FUNCTION_RET(f,return_type) // 2 -BZ_DECLARE_FUNCTION2(f) // 3 -BZ_DECLARE_FUNCTION2_RET(f,return_type) // 4 -@end example - -Use version 1 when you have a function which takes one argument and returns -a result of the same type. For example: - -@example -#include - -using namespace blitz; - -double myFunction(double x) -@{ - return 1.0 / (1 + x); -@} - -BZ_DECLARE_FUNCTION(myFunction) - -int main() -@{ - Array A(4,4), B(4,4); // ... - B = myFunction(A); -@} -@end example - -Use version 2 when you have a one argument function whose return type is -different than the argument type, such as - -@example -int g(double x); -@end example - -Use version 3 for a function which takes two arguments and returns a result -of the same type, such as: - -@example -double g(double x, double y); -@end example - -Use version 4 for a function of two arguments which returns a different -type, such as: - -@example -int g(double x, double y); -@end example - -@section Tensor notation - -@cindex tensor notation -@cindex Array tensor notation - -Blitz++ arrays support a tensor-like notation. Here's an example of -real-world tensor notation: -@tex -$$ A^{ijk} = B^{ij} C^k $$ -@end tex -@html -
- ijk    ij k
-A    = B  C
-
-@end html -@ifnottex -@ifnothtml -@example - ijk ij k -A = B C -@end example -@end ifnothtml -@end ifnottex - -@math{A} is a rank 3 tensor (a three dimensional array), @math{B} is a rank -2 tensor (a two dimensional array), and @math{C} is a rank 1 tensor (a one -dimensional array). The above expression sets -@code{A(i,j,k) = B(i,j) * C(k)}. - -To implement this product using Blitz++, we'll need the arrays and some -index placeholders: - -@cindex index placeholders used for tensor notation - -@example -Array A(4,4,4); -Array B(4,4); -Array C(4); - -firstIndex i; // Alternately, could just say -secondIndex j; // using namespace blitz::tensor; -thirdIndex k; -@end example - -Here's the Blitz++ code which is equivalent to the tensor expression: - -@example -A = B(i,j) * C(k); -@end example - -The index placeholder arguments tell an array how to map its dimensions onto -the dimensions of the destination array. For example, here's some -real-world tensor notation: -@tex -$$ C^{ijk} = A^{ij} x^{k} - A^{jk} y^{i} $$ -@end tex -@html -
- ijk    ij k    jk i
-C    = A  x  - A  y
-
-@end html -@ifnottex -@ifnothtml -@example - ijk ij k jk i -C = A x - A y -@end example -@end ifnothtml -@end ifnottex - -In Blitz++, this would be coded as: - -@example -using namespace blitz::tensor; - -C = A(i,j) * x(k) - A(j,k) * y(i); -@end example - -This tensor expression can be visualized in the following way: - -@center @image{tensor1} -@center Examples of array indexing, subarrays, and slicing. - -Here's an example which computes an outer product of two one-dimensional -arrays: -@cindex outer product -@cindex kronecker product -@cindex tensor product - -@smallexample -@include examples/outer.texi -@end smallexample - -And the output: - -@smallexample -@include examples/outer.out -@end smallexample - -Index placeholders can @emph{not} be used on the left-hand side of an -expression. If you need to reorder the indices, you must do this on the -right-hand side. - -In real-world tensor notation, repeated indices imply a contraction (or -summation). For example, this tensor expression computes a matrix-matrix -product: -@tex -$$ C^{ij} = A^{ik} B^{kj} $$ -@end tex -@html -
- ij    ik  kj
-C   = A   B
-
-@end html -@ifnottex -@ifnothtml -@example - ij ik kj -C = A B -@end example -@end ifnothtml -@end ifnottex - -The repeated k index is interpreted as meaning -@tex -$$ c_{ij} = \sum_{k} a_{ik} b_{kj} $$ -@end tex -@html -
-c    = sum of (a   * b  ) over k
- ij             ik    kj
-
-@end html -@ifnottex -@ifnothtml -@example -c = sum of @{a * b @} over k - ij ik kj -@end example -@end ifnothtml -@end ifnottex - -@cindex contraction -@cindex tensor contraction - -In Blitz++, repeated indices do @emph{not} imply contraction. If you want -to contract (sum along) an index, you must use the @code{sum()} function: - -@example -Array A, B, C; // ... -firstIndex i; -secondIndex j; -thirdIndex k; - -C = sum(A(i,k) * B(k,j), k); -@end example - -The @code{sum()} function is an example of an @emph{array reduction}, -described in the next section. - -Index placeholders can be used in any order in an expression. This example -computes a kronecker product of a pair of two-dimensional arrays, and -permutes the indices along the way: - -@example -Array A, B; // ... -Array C; // ... -fourthIndex l; - -C = A(l,j) * B(k,i); -@end example - -This is equivalent to the tensor notation -@tex -$$ C^{ijkl} = A^{lj} B^{ki} $$ -@end tex -@html -
- ijkl    lj ki
-C     = A  B
- 
-@end html -@ifnottex -@ifnothtml -@example - ijkl lj ki -C = A B -@end example -@end ifnothtml -@end ifnottex - -Tensor-like notation can be mixed with other array notations: - -@example -Array A, B; // ... -Array C; // ... - -C = cos(A(l,j)) * sin(B(k,i)) + 1./(i+j+k+l); -@end example - -@cindex tensor notation efficiency issues -An important efficiency note about tensor-like notation: the right-hand side -of an expression is @emph{completely evaluated} for @emph{every} element in -the destination array. For example, in this code: - -@example -Array x(4), y(4); -Array A(4,4): - -A = cos(x(i)) * sin(y(j)); -@end example - -The resulting implementation will look something like this: - -@example -for (int n=0; n < 4; ++n) - for (int m=0; m < 4; ++m) - A(n,m) = cos(x(n)) * sin(y(m)); -@end example - -The functions @code{cos} and @code{sin} will be invoked sixteen times each. -It's possible that a good optimizing compiler could hoist the @code{cos} -evaluation out of the inner loop, but don't hold your breath -- there's a -lot of complicated machinery behind the scenes to handle tensor notation, -and most optimizing compilers are easily confused. In a situation like the -above, you are probably best off manually creating temporaries for -@code{cos(x)} and @code{sin(y)} first. - -@section Array reductions -@cindex Array reductions -@cindex reductions - -Currently, Blitz++ arrays support two forms of reduction: - -@itemize @bullet - -@item Reductions which transform an array into a scalar (for example, -summing the elements). These are referred to as @strong{complete -reductions}. - -@item Reducing an N dimensional array (or array expression) to an N-1 -dimensional array expression. These are called @strong{partial reductions}. - -@end itemize - -@cindex Array reductions complete -@cindex complete reductions -@cindex reductions complete - -@section Complete reductions - -Complete reductions transform an array (or array expression) into -a scalar. Here are some examples: - -@example -Array A(3,3); -A = 0, 1, 2, - 3, 4, 5, - 6, 7, 8; -cout << sum(A) << endl // 36 - << min(A) << endl // 0 - << count(A >= 4) << endl; // 5 -@end example - -Here are the available complete reductions: - -@table @code -@item sum() -@cindex @code{sum()} reduction -Summation (may be promoted to a higher-precision type) - -@item product() -@cindex @code{product()} reduction -Product - -@item mean() -@cindex @code{mean()} reduction -Arithmetic mean (promoted to floating-point type if necessary) - -@item min() -@cindex @code{min()} reduction -Minimum value - -@item max() -@cindex @code{max()} reduction -Maximum value - -@item minmax() -@cindex @code{minmax()} reduction -Simultaneous minimum and maximum value (returns a value of type MinMaxValue) - -@item minIndex() -@cindex @code{minIndex()} reduction -Index of the minimum value (@code{TinyVector}) - -@item maxIndex() -@cindex @code{maxIndex()} reduction -Index of the maximum value (@code{TinyVector}) - -@item count() -@cindex @code{count()} reduction -Counts the number of times the expression is logical true (@code{int}) - -@item any() -@cindex @code{any()} reduction -True if the expression is true anywhere (@code{bool}) - -@item all() -@cindex @code{all()} reduction -True if the expression is true everywhere (@code{bool}) -@end table - -@strong{Caution:} @code{minIndex()} and @code{maxIndex()} return TinyVectors, -even when the rank of the array (or array expression) is 1. - -Reductions can be combined with @code{where} expressions (@ref{Where expr}) -to reduce over some part of an array. For example, @code{sum(where(A > 0, -A, 0))} sums only the positive elements in an array. - -@section Partial Reductions - -@cindex Array reductions partial -@cindex partial reductions -@cindex reductions partial - -Here's an example which computes the sum of each row of a two-dimensional -array: - -@example -Array A; // ... -Array rs; // ... -firstIndex i; -secondIndex j; - -rs = sum(A, j); -@end example - -The reduction @code{sum()} takes two arguments: - -@itemize @bullet - -@item The first argument is an array or array expression. - -@item The second argument is an index placeholder indicating the -dimension over which the reduction is to occur. - -@end itemize - -Reductions have an @strong{important restriction}: It is currently only -possible to reduce over the @emph{last} dimension of an array or array -expression. Reducing a dimension other than the last would require Blitz++ -to reorder the dimensions to fill the hole left behind. For example, in -order for this reduction to work: - -@example -Array A; // ... -Array B; // ... -secondIndex j; - -// Reduce over dimension 2 of a 3-D array? -B = sum(A, j); -@end example - -Blitz++ would have to remap the dimensions so that the third dimension -became the second. It's not currently smart enough to do this. - -However, there is a simple workaround which solves some of the problems -created by this limitation: you can do the reordering manually, prior to the -reduction: - -@example -B = sum(A(i,k,j), k); -@end example - -Writing @code{A(i,k,j)} interchanges the second and third dimensions, -permitting you to reduce over the second dimension. Here's a list of the -reduction operations currently supported: - -@table @code -@item sum() -Summation - -@item product() -Product - -@item mean() -Arithmetic mean (promoted to floating-point type if necessary) - -@item min() -Minimum value - -@item max() -Maximum value - -@item minIndex() -Index of the minimum value (int) - -@item maxIndex() -Index of the maximum value (int) - -@item count() -Counts the number of times the expression is logical true (int) - -@item any() -True if the expression is true anywhere (bool) - -@item all() -True if the expression is true everywhere (bool) - -@item first() -First index at which the expression is logical true (int); if the expression -is logical true nowhere, then @code{tiny(int())} (INT_MIN) is returned. - -@item last() -Last index at which the expression is logical true (int); if the expression -is logical true nowhere, then @code{huge(int())} (INT_MAX) is returned. -@end table - -The reductions @code{any()}, @code{all()}, and @code{first()} have -short-circuit semantics: the reduction will halt as soon as the answer is -known. For example, if you use @code{any()}, scanning of the expression -will stop as soon as the first true value is encountered. - -To illustrate, here's an example: - -@example -Array A(4,4); - -A = 3, 8, 0, 1, - 1, -1, 9, 3, - 2, -5, -1, 1, - 4, 3, 4, 2; - -Array z(4); -firstIndex i; -secondIndex j; - -z = sum(A(j,i), j); -@end example - -The array @code{z} now contains the sum of @code{A} along each column: - -@example -[ 10 5 12 7 ] -@end example - -This table shows what the result stored in @code{z} would be if -@code{sum()} were replaced with other reductions: - -@example -sum [ 10 5 12 7 ] -mean [ 2.5 1.25 3 1.75 ] -min [ 1 -5 -1 1 ] -minIndex [ 1 2 2 0 ] -max [ 4 8 9 3 ] -maxIndex [ 3 0 1 1 ] -first((A < 0), j) [ -2147483648 1 2 -2147483648 ] -product [ 24 120 0 6 ] -count((A(j,i) > 0), j) [ 4 2 2 4 ] -any(abs(A(j,i)) > 4, j) [ 0 1 1 0 ] -all(A(j,i) > 0, j) [ 1 0 0 1 ] -@end example - -Note: the odd numbers for first() are @code{tiny(int())} i.e.@: the smallest -number representable by an int. The exact value is machine-dependent. - -@cindex Array reductions chaining -@cindex partial reductions chaining -@cindex reductions chaining - -The result of a reduction is an array expression, so reductions -can be used as operands in an array expression: - -@example -Array A; -Array B; -Array C; // ... - -secondIndex j; -thirdIndex k; - -B = sqrt(sum(sqr(A), k)); - -// Do two reductions in a row -C = sum(sum(A, k), j); -@end example - -Note that this is not allowed: - -@example -Array A; -firstIndex i; -secondIndex j; - -// Completely sum the array? -int result = sum(sum(A, j), i); -@end example - -You cannot reduce an array to zero dimensions! Instead, use one of the -global functions described in the previous section. - - -@node Where expr, , User et, Array Expressions -@section where statements -@cindex @code{where} statements -@cindex functional if (@code{where}) -@cindex @code{if} (@code{where}) - -Blitz++ provides the @code{where} function as an array expression version of the -@code{( ? : )} operator. The syntax is: - -@example -where(array-expr1, array-expr2, array-expr3) -@end example - -Wherever @code{array-expr1} is true, @code{array-expr2} is returned. Where -@code{array-expr1} is false, @code{array-expr3} is returned. For example, -suppose we wanted to sum the squares of only the positive elements of an -array. This can be implemented using a where function: - -@example -double posSquareSum = sum(where(A > 0, pow2(A), 0)); -@end example - diff --git a/doc/arrays-globals.texi b/doc/arrays-globals.texi deleted file mode 100644 index c3b4a425..00000000 --- a/doc/arrays-globals.texi +++ /dev/null @@ -1,166 +0,0 @@ - -@node Array globals, Array I/O, Array members, Arrays -@section Global functions - -@example -void allocateArrays(TinyVector& shape, - Array& A, - Array& B, ...); -@end example -@findex allocateArrays() - -This function will allocate interlaced arrays, but only if interlacing is -desirable for your architecture. This is controlled by the -@code{BZ_INTERLACE_ARRAYS} flag in @file{blitz/tuning.h}. You can provide up to -11 arrays as parameters. Any views currently associated with the array -objects are lost. Here is a typical use: - -@example -Array A, B, C; -allocateArrays(shape(64,64),A,B,C); -@end example - -@cindex interlacing -@cindex Array interlacing - -If array interlacing is enabled, then the arrays are stored in memory like -this: @code{A(0,0)}, @code{B(0,0)}, @code{C(0,0)}, @code{A(0,1)}, -@code{B(0,1)}, ... If interlacing is disabled, then the arrays are -allocated in the normal fashion: each array has its own block of memory. -Once interlaced arrays are allocated, they can be used just like regular -arrays. - -@cindex convolution, 1-D -@cindex Array convolution -@findex convolve() - -@example -#include -Array convolve(const Array& B, - const Array& C); -@end example - -This function computes the 1-D convolution of the arrays B and C: -@tex -$$ A[i] = \sum_j B[j] C[i-j] $$ -@end tex -@html -A[i] = sum(B[j] * C[i-j], j) -@end html -@ifnottex -@ifnothtml -@example -A[i] = sum(B[j] * C[i-j], j) -@end example -@end ifnothtml -@end ifnottex -If the array @math{B} has domain @math{b_l \ldots b_h}, and array @math{C} -has domain @math{c_l \ldots c_h}, then the resulting array has domain -@math{a_l \ldots a_h}, with @math{l = b_l + c_l} and @math{a_h = b_h + c_h}. - -A new array is allocated to contain the result. To avoid copying the result -array, you should use it as a constructor argument. For example: -@code{Array A = convolve(B,C);} The convolution is computed in the -spatial domain. Frequency-domain transforms are not used. If you are -convolving two large arrays, then this will be slower than using a Fourier -transform. - -@cindex correlation -@cindex Array correlation - -Note that if you need a cross-correlation, you can use the convolve function -with one of the arrays reversed. For example: - -@example -Array A = convolve(B,C.reverse()); -@end example - -Autocorrelation can be performed using the same approach. - -@example -void cycleArrays(Array& A, Array& B); -void cycleArrays(Array& A, Array& B, - Array& C); -void cycleArrays(Array& A, Array& B, - Array& C, Array& D); -void cycleArrays(Array& A, Array& B, - Array& C, Array& D, - Array& E); -@end example - -@findex cycleArrays() -@cindex time-stepping - -These routines are useful for time-stepping PDEs. They take a set of arrays -such as [@code{A,B,C,D}] and cyclically rotate them to [@code{B,C,D,A}]; -i.e.@: the @code{A} array then refers to what was @code{B}'s data, the -@code{B} array refers to what was @code{C}'s data, and the @code{D} array -refers to what was @code{A}'s data. These functions operate in constant -time, since only the handles change (i.e.@: no data is copied; only pointers -change). - -@example -void find(Array,1>& indices, - const _bz_ArrayExpr& expr); -void find(Array,1>& indices, - const Array& exprVals); -@end example - -This is an analogue to the Matlab @code{find()} method, which takes a -boolean array expression or an array of bools and returns a 1d array -of indices for all locations where the array or expression is true. - -@findex find() - -@example -Array imag(Array,N>&); -@end example - -This method returns a view of the imaginary portion of the array. - -@findex imag() - -@example -void interlaceArrays(TinyVector& shape, - Array& A, - Array& B, ...); -@end example - -This function is similar to @code{allocateArrays()} above, except that the -arrays are @strong{always} interlaced, regardless of the setting of the -@code{BZ_INTERLACE_ARRAYS} flag. - -@findex interlaceArrays() - -@example -Array real(Array,N>&); -@end example - -This method returns a view of the real portion of the array. - -@findex real() - -@example -TinyVector shape(int L); -TinyVector shape(int L, int M); -TinyVector shape(int L, int M, int N); -TinyVector shape(int L, int M, int N, int O); -... [up to 11 dimensions] -@end example - -@findex shape() - -These functions may be used to create shape parameters. They package the -set of integer arguments as a @code{TinyVector} of appropriate length. For -an example use, see @code{allocateArrays()} above. - -@example -void swap(Array& A, Array& B); -@end example - -This function swaps the storage of two arrays, just like the @code{std::swap()} -function does for STL container types. This is a synonym for the -two-argument version of @code{cycleArrays()} above. - -@findex swap() - diff --git a/doc/arrays-indirect.texi b/doc/arrays-indirect.texi deleted file mode 100644 index 7e254f89..00000000 --- a/doc/arrays-indirect.texi +++ /dev/null @@ -1,258 +0,0 @@ - -@cindex indirection -@cindex Array indirection - -@strong{Indirection} is the ability to modify or access an array at a set of -selected index values. Blitz++ provides several forms of indirection: - -@itemize @bullet - -@item @strong{Using a list of array positions}: this approach is useful -if you need to modify an array at a set of scattered points. - -@item @strong{Cartesian-product indirection}: as an example, for a -two-dimensional array you might have a list @code{I} of rows and a list -@code{J} of columns, and you want to modify the array at all (i,j) positions -where i is in @code{I} and j is in @code{J}. This is a @strong{cartesian -product} of the index sets @code{I} and @code{J}. - -@item @strong{Over a set of strips}: for efficiency, you can represent an -arbitrarily-shaped subset of an array as a list of one-dimensional strips. -This is a useful way of handling @strong{Regions Of Interest} (ROIs). - -@end itemize - -@center @image{indirect} -@center Three styles of indirection. @footnote{From top to bottom: (1) using a list of array positions; (2) Cartesian-product indirection; (3) using a set of strips to represent an arbitrarily-shaped subset of an array.} - -@cindex STL, for indirection - -In all cases, Blitz++ expects a Standard Template Library container. Some -useful STL containers are @code{list<>}, @code{vector<>}, @code{deque<>} and -@code{set<>}. Documentation of these classes is often provided with your -compiler, or see also the good documentation at -@uref{http://www.sgi.com/Technology/STL/}. STL containers are used because -they are widely available and provide easier manipulation of ``sets'' than -Blitz++ arrays. For example, you can easily expand and merge sets which are -stored in STL containers; doing this is not so easy with Blitz++ arrays, -which are designed for numerical work. - -STL containers are generally included by writing - -@example -#include // for list<> -#include // for vector<> -#include // for deque<> -#include // for set<> -@end example - -@cindex [] operator, for indirection - -The @code{[]} operator is overloaded on arrays so that the syntax -@code{array[container]} provides an indirect view of the array. So far, -this indirect view may only be used as an lvalue (i.e.@: on the left-hand side -of an assignment statement). - -The examples in the next sections are available in the Blitz++ distribution -in @file{}. - -@node Indirection position list, Indirection Cartesian product, , Indirection -@section Indirection using lists of array positions - -@cindex Array indirection list of positions -@cindex indirection list of positions - -The simplest kind of indirection uses a list of points. For one-dimensional -arrays, you can just use an STL container of integers. Example: - -@example - Array A(5), B(5); - A = 0; - B = 1, 2, 3, 4, 5; - - vector I; - I.push_back(2); - I.push_back(4); - I.push_back(1); - - A[I] = B; -@end example - -After this code, the array A contains @code{[ 0 2 3 0 5 ]}. - -Note that arrays on the right-hand-side of the assignment must have the same -shape as the array on the left-hand-side (before indirection). In the -statement @code{A[I] = B}, A and B must have the same shape, not I and B. - -For multidimensional arrays, you can use an STL container of -@code{TinyVector} objects. Example: - -@example - Array A(4,4), B(4,4); - A = 0; - B = 10*tensor::i + tensor::j; - - typedef TinyVector coord; - - list I; - I.push_back(coord(1,1)); - I.push_back(coord(2,2)); - - A[I] = B; -@end example - -After this code, the array A contains: - -@example - 0 0 0 0 - 0 11 0 0 - 0 0 22 0 - 0 0 0 0 -@end example - -(The @code{tensor::i} notation is explained in the section on index -placeholders @ref{Index placeholders}). - -@node Indirection Cartesian product, Indirection strip list, Indirection position list, Indirection -@section Cartesian-product indirection - -@cindex Array indirection Cartesian-product -@cindex indirection Cartesian-product - -The Cartesian product of the sets I, J and K is the set of (i,j,k) tuples -for which i is in I, j is in J, and k is in K. - -Blitz++ implements cartesian-product indirection using an @strong{adaptor} -which takes a set of STL containers and iterates through their Cartesian -product. Note that the cartesian product is never explicitly created. You -create the Cartesian-product adaptor by calling the function: - -@example -template -indexSet(T_container& c1, T_container& c2, ...) -@end example - -The returned adaptor can then be used in the @code{[]} operator of an array -object. - -Here is a two-dimensional example: - -@cindex rank-1 update - -@example - Array A(6,6), B(6,6); - A = 0; - B = 10*tensor::i + tensor::j; - - vector I, J; - I.push_back(1); - I.push_back(2); - I.push_back(4); - - J.push_back(0); - J.push_back(2); - J.push_back(5); - - A[indexSet(I,J)] = B; -@end example - -After this code, the A array contains: - -@example - 0 0 0 0 0 0 -10 0 12 0 0 15 -20 0 22 0 0 25 - 0 0 0 0 0 0 -40 0 42 0 0 45 - 0 0 0 0 0 0 -@end example - -All the containers used in a cartesian product must be the same type (e.g. -all @code{vector} or all @code{set >}), but they may -be different sizes. Singleton containers (containers containing a single -value) are fine. - -@node Indirection strip list, , Indirection Cartesian product, Indirection -@section Indirection with lists of strips - -@cindex Array indirection list of strips -@cindex indirection list of strips - -You can also do indirection with a container of one-dimensional -@strong{strips}. This is useful when you want to manipulate some -arbitrarily-shaped, well-connected subdomain of an array. By representing -the subdomain as a list of strips, you allow Blitz++ to operate on vectors, -rather than scattered points; this is much more efficient. - -@findex RectDomain - -Strips are represented by objects of type @code{RectDomain}, where -@code{N} is the dimensionality of the array. The @code{RectDomain} class -can be used to represent any rectangular subdomain, but for indirection it -is only used to represent strips. - -You create a strip by using this function: - -@findex strip() - -@example -RectDomain strip(TinyVector start, - int stripDimension, int ubound); -@end example - -The @code{start} parameter is where the strip starts; @code{stripDimension} -is the dimension in which the strip runs; @code{ubound} is the last index -value for the strip. For example, to create a 2-dimensional strip from -(2,5) to (2,9), one would write: - -@example -TinyVector start(2,5); -RectDomain<2> myStrip = strip(start,secondDim,9); -@end example - -Here is a more substantial example which creates a list of strips -representing a circle subset of an array: - -@example - const int N = 7; - Array A(N,N), B(N,N); - typedef TinyVector coord; - - A = 0; - B = 1; - - double centre_i = (N-1)/2.0; - double centre_j = (N-1)/2.0; - double radius = 0.8 * N/2.0; - - // circle will contain a list of strips which represent a circular - // subdomain. - - list > circle; - for (int i=0; i < N; ++i) - @{ - double jdist2 = pow2(radius) - pow2(i-centre_i); - if (jdist2 < 0.0) - continue; - - int jdist = int(sqrt(jdist2)); - coord startPos(i, int(centre_j - jdist)); - circle.push_back(strip(startPos, secondDim, int(centre_j + jdist))); - @} - - // Set only those points in the circle subdomain to 1 - A[circle] = B; -@end example - -After this code, the A array contains: - -@example - 0 0 0 0 0 0 0 - 0 0 1 1 1 0 0 - 0 1 1 1 1 1 0 - 0 1 1 1 1 1 0 - 0 1 1 1 1 1 0 - 0 0 1 1 1 0 0 - 0 0 0 0 0 0 0 -@end example - diff --git a/doc/arrays-intro.texi b/doc/arrays-intro.texi deleted file mode 100644 index a528cde2..00000000 --- a/doc/arrays-intro.texi +++ /dev/null @@ -1,166 +0,0 @@ - -@node Array intro, Array types, , Arrays -@section Getting started -@cindex Array overview - -Currently, Blitz++ provides a single array class, called -@code{Array}. This array class provides a dynamically -allocated N-dimensional array, with reference counting, arbitrary storage -ordering, subarrays and slicing, flexible expression handling, and many -other useful features. - -@subsection Template parameters -@cindex Array template parameters - -The @code{Array} class takes two template parameters: - -@itemize @bullet -@item @code{T_numtype} -is the numeric type to be stored in the array. @code{T_numtype} can be an -integral type (@code{bool}, @code{char}, @code{unsigned char}, @code{short -int}, @code{short unsigned int}, @code{int}, @code{unsigned int}, -@code{long}, @code{unsigned long}), floating point type (@code{float}, -@code{double}, @code{long double}), complex type (@code{complex}, -@code{complex}, @code{complex}) or any user-defined -type with appropriate numeric semantics. - -@item @code{N_rank} -@cindex Array rank parameter -@cindex rank parameter of arrays -is the @strong{rank} (or dimensionality) of the array. This should be a -positive integer. - -@end itemize - -To use the @code{Array} class, include the header @code{} and -use the namespace @code{blitz}: - -@findex using namespace blitz -@findex namespace blitz -@cindex blitz namespace - -@example -#include - -using namespace blitz; - -Array x; // A one-dimensional array of int -Array y; // A two-dimensional array of double -. -. -Array, 12> z; // A twelve-dimensional array of complex -@end example - -When no constructor arguments are provided, the array is empty, and no -memory is allocated. To create an array which contains some data, provide -the size of the array as constructor arguments: - -@example -Array y(4,4); // A 4x4 array of double -@end example - -The contents of a newly-created array are garbage. To initialize -the array, you can write: - -@example -y = 0; -@end example - -and all the elements of the array will be set to zero. If the contents of -the array are known, you can initialize it using a comma-delimited list of -values. For example, this code excerpt sets @code{y} equal to a 4x4 -identity matrix: - -@example -y = 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0, - 0, 0, 0, 1; -@end example - -@subsection Array types -@cindex Array types - -The @code{Array} class supports a variety of arrays: - -@itemize @bullet - -@cindex Array scalar arrays -@item Arrays of scalar types, such as @code{Array} and -@code{Array} - -@cindex Array complex arrays -@cindex complex arrays -@item Complex arrays, such as @code{Array,2>} - -@cindex Array of user-defined types -@cindex Array of TinyVector -@cindex vector field -@cindex Array of TinyMatrix -@cindex Array nested -@cindex Array nested homogeneous -@cindex nested arrays -@cindex nested arrays homogeneous -@item Arrays of user-defined types. If you have a class called -@code{Polynomial}, then @code{Array} is an array of -@code{Polynomial} objects. - -@cindex Array of Array -@cindex Array nested heterogeneous -@cindex nested arrays heterogeneous -@item Nested homogeneous arrays using @code{TinyVector} and -@code{TinyMatrix}, in which each element is a fixed-size vector or array. -For example, @code{Array,3>} is a three-dimensional -vector field. - -@item Nested heterogeneous arrays, such as @code{Array,1>}, in -which each element is a variable-length array. -@end itemize - -@subsection A simple example - -Here's an example program which creates two 3x3 arrays, initializes -them, and adds them: - -@smallexample -@include examples/simple.texi -@end smallexample - -and the output: - -@smallexample -@include examples/simple.out -@end smallexample - -@subsection Storage orders -@cindex Array storage order -@cindex storage orders for arrays -@cindex row major -@cindex column major -@findex fortranArray -@cindex Array row major -@cindex Array column major -@cindex Array fortran-style - -Blitz++ is very flexible about the way arrays are stored in memory. - -The default storage format is row-major, C-style arrays whose indices start -at zero. - -Fortran-style arrays can also be created. Fortran arrays are stored in -column-major order, and have indices which start at one. To create a -Fortran-style array, use this syntax: @code{Array A(3, 3, -fortranArray);} The last parameter, @code{fortranArray}, tells the -@code{Array} constructor to use a fortran-style array format. - -@code{fortranArray} is a global object which has an automatic conversion to -type @code{GeneralArrayStorage}. @code{GeneralArrayStorage} -encapsulates information about how an array is laid out in memory. By -altering the contents of a @code{GeneralArrayStorage} object, you can lay -out your arrays any way you want: the dimensions can be ordered arbitrarily -and stored in ascending or descending order, and the starting indices can be -arbitrary. - -Creating custom array storage formats is described in a later section -(@ref{Array storage}). - diff --git a/doc/arrays-io.texi b/doc/arrays-io.texi deleted file mode 100644 index 35b0b474..00000000 --- a/doc/arrays-io.texi +++ /dev/null @@ -1,79 +0,0 @@ - -@node Array I/O, Array storage, Array globals, Arrays -@section Inputting and Outputting Arrays - -@subsection Output formatting - -@cindex persistence -@cindex Array persistence -@cindex output formatting -@cindex Array output formatting -@cindex saving arrays -@cindex writing arrays to output streams -@cindex Array saving to output stream -@cindex Array writing to output stream - -The current version of Blitz++ includes rudimentary output formatting for -arrays. Here's an example: - -@smallexample -@include examples/output.texi -@end smallexample - -And the output: - -@smallexample -@include examples/output.out -@end smallexample - -@subsection Inputting arrays - -@cindex inputting arrays from an input stream -@cindex Array inputting from istream -@cindex restoring arrays from an input stream -@cindex Array restoring from istream - -Arrays may be restored from an istream using the @code{>>} operator. - -@strong{Caution:} you must know the dimensionality of the array being restored -from the stream. The @code{>>} operator expects an array in the same input -format as generated by the @code{<<} operator, namely: - -@cindex Array persistence format - -@itemize @bullet - -@item The size of the array, for example ``32'' for a 1-dimensional array of -32 elements, ``12 x 64 x 128'' for a 3-dimensional array of size 12x64x128. - -@item The symbol @code{'['} indicating the start of the array data - -@item The array elements, listed in memory storage order - -@item The symbol @code{']'} indicating the end of the array data - -@end itemize - -The operator prototype is: - -@example -template -istream& operator>>(istream&, Array&); -@end example - -Here is an example of saving and restoring arrays from files. You can find -this example in the Blitz++ distribution as @file{examples/io.cpp}. - -@smallexample -@include examples/io.texi -@end smallexample - -@strong{Caution:} The storage order and starting indices are not restored from -the input stream. If you are restoring (for example) a Fortran-style array, -you must create a Fortran-style array, and then restore it. For example, -this code restores a Fortran-style array from the standard input stream: - -@example -Array B(fortranArray); -cin >> B; -@end example diff --git a/doc/arrays-members.texi b/doc/arrays-members.texi deleted file mode 100644 index 8327e079..00000000 --- a/doc/arrays-members.texi +++ /dev/null @@ -1,555 +0,0 @@ - -@node Array members, Array globals, Array debug, Arrays -@section Member functions - -@subsection A note about dimension parameters -@cindex dimension parameters -@cindex Array dimension parameters - -Several of the member functions take a @emph{dimension parameter} which is -an integer in the range 0 .. @code{N_rank}-1. For example, the method -@code{extent(int n)} returns the extent (or length) of the array in -dimension @code{n}. - -These parameters are problematic: - -@itemize @bullet - -@item They make the code cryptic. Someone unfamiliar with the -@code{reverse()} member function won't stand a chance of understanding what -@code{A.reverse(2)} does. - -@item Some users are used to dimensions being 1 .. @code{N_rank}, rather -than 0 .. @code{N_rank}-1. This makes dimension numbers inherently -error-prone. Even though I'm a experienced C/C++ programmer, I @emph{still} -want to think of the first dimension as 1 -- it doesn't make sense to talk -about the ``zeroth'' dimension. - -@end itemize - -As a solution to this problem, Blitz++ provides a series of symbolic -constants which you can use to refer to dimensions: - -@findex firstDim -@findex secondDim -@findex thirdDim -@findex fourthDim - -@example -const int firstDim = 0; -const int secondDim = 1; -const int thirdDim = 2; - . - . -const int eleventhDim = 10; -@end example - -These symbols should be used in place of the numerals 0, 1, ... @code{N_rank}-1. -For example: - -@example -A.reverse(thirdDim); -@end example - -This code is clearer: you can see that the parameter refers to a dimension, -and it isn't much of a leap to realize that it's reversing the element -ordering in the third dimension. - -If you find @code{firstDim}, @code{secondDim}, ... aesthetically unpleasing, -there are equivalent symbols @code{firstRank}, @code{secondRank}, -@code{thirdRank}, ..., @code{eleventhRank}. - -@cindex eleven, end of the universe at - -@unnumberedsubsubsec Why stop at eleven? - -The symbols had to stop somewhere, and eleven seemed an appropriate place to -stop. Besides, if you're working in more than eleven dimensions your code -is going to be confusing no matter what help Blitz++ provides. - -@cindex Array member functions -@subsection Member function descriptions -@cindex Array member functions @code{base()} -@findex base() -@example -const TinyVector& base() const; -int base(int dimension) const; -@end example - -The @emph{base} of a dimension is the first valid index value. A typical -C-style array will have base of zero; a Fortran-style array will have base -of one. The base can be different for each dimension, but only if you -deliberately use a Range-argument constructor or design a custom storage -ordering. - -The first version returns a reference to the vector of base values. -The second version returns the base for just one dimension; it's -equivalent to the @code{lbound()} member function. See the -note on dimension parameters such as @code{firstDim} above. - -@cindex iterators for arrays -@findex const_iterator -@cindex Array iterators -@cindex Array member functions @code{begin()} -@cindex STL iterators for arrays -@findex begin() - -@example -Array::iterator begin(); -Array::const_iterator begin() const; -@end example - -These functions return STL-style forward and input iterators, respectively, -positioned at the first element of the array. Note that the array data is -traversed in memory order (i.e.@: by rows for C-style arrays, and by columns -for Fortran-style arrays). The @code{Array::const_iterator} has these -methods: - -@example -const_iterator(const Array&); -T operator*() const; -const T* [restrict] operator->() const; -const_iterator& operator++(); -void operator++(int); -bool operator==(const const_iterator&) const; -bool operator!=(const const_iterator&) const; -const TinyVector& position() const; -@end example - -Note that postfix ++ returns void (this is not STL-compliant, but is done -for efficiency). The method @code{position()} returns a vector containing -current index positions of the iterator. The @code{Array::iterator} -has the same methods as @code{const_iterator}, with these exceptions: -@code{iterator& operator++(); T& operator*(); T* [restrict] operator->();} -The @code{iterator} type may be used to modify array elements. To obtain -iterator positioned at the end of the array, use the @code{end()} methods. - -@cindex Array member functions @code{cols()} -@cindex Array member functions @code{columns()} -@findex cols() -@findex columns() -@example -int cols() const; -int columns() const; -@end example - -Both of these functions return the extent of the array in the -second dimension. Equivalent to @code{extent(secondDim)}. -See also @code{rows()} and @code{depth()}. - -@cindex Array member functions @code{copy()} -@cindex Array copying -@findex copy() -@example -Array copy() const; -@end example - -This method creates a copy of the array's data, using the same storage -ordering as the current array. The returned array is guaranteed to be -stored contiguously in memory, and to be the only object referring to its -memory block (i.e.@: the data isn't shared with any other array object). - -@cindex Array getting pointer to array data -@findex data() -@cindex Array member functions @code{data()} -@findex dataZero() -@cindex Array member functions @code{dataZero()} -@findex dataFirst() -@cindex Array member functions @code{dataFirst()} -@example -const T_numtype* [restrict] data() const; - T_numtype* [restrict] data(); -const T_numtype* [restrict] dataZero() const; - T_numtype* [restrict] dataZero(); -const T_numtype* [restrict] dataFirst() const; - T_numtype* [restrict] dataFirst(); -@end example - -These member functions all return pointers to the array data. The NCEG -@code{restrict} qualifier is used only if your compiler supports it. If -you're working with the default storage order (C-style arrays with base -zero), you'll only need to use @code{data()}. Otherwise, things get -complicated: - -@code{data()} returns a pointer to the element whose indices are equal to -the array base. With a C-style array, this means the element (0,0,...,0); -with a Fortran-style array, this means the element (1,1,...,1). If @code{A} -is an array object, @code{A.data()} is equivalent to (&A(A.base(firstDim), -A.base(secondDim), ...)). If any of the dimensions are stored in reverse -order, @code{data()} will not refer to the element which comes first in -memory. - -@code{dataZero()} returns a pointer to the element (0,0,...,0), even if such -an element does not exist in the array. What's the point of having such a -pointer? Say you want to access the element (i,j,k). If you add to the -pointer the dot product of (i,j,k) with the stride vector -(@code{A.stride()}), you get a pointer to the element (i,j,k). - -@code{dataFirst()} returns a pointer to the element of the array which comes -first in memory. Note however, that under some circumstances (e.g. -subarrays), the data will not be stored contiguously in memory. You have to -be very careful when meddling directly with an array's data. - -Other relevant functions are: @code{isStorageContiguous()} and -@code{zeroOffset()}. - -@cindex Array member functions @code{depth()} -@findex depth() -@example -int depth() const; -@end example - -Returns the extent of the array in the third dimension. This function is -equivalent to @code{extent(thirdDim)}. See also @code{rows()} and -@code{columns()}. - -@findex dimensions() -@cindex Array member functions @code{dimensions()} -@example -int dimensions() const; -@end example - -Returns the number of dimensions (rank) of the array. The return value is -the second template parameter (@code{N_rank}) of the @code{Array} object. -Same as @code{rank()}. - -@findex domain() -@cindex Array member functions @code{domain()} -@cindex Array obtaining domain of -@example -RectDomain domain() const; -@end example - -Returns the domain of the array. The domain consists of a vector of lower -bounds and a vector of upper bounds for the indices. NEEDS_WORK-- need a -section to explain methods of @code{RectDomain}. - -@findex end() -@cindex Array member functions @code{end()} -@example -Array::iterator end(); -Array::const_iterator end() const; -@end example - -Returns STL-style forward and input iterators (respectively) for the array, -positioned at the end of the array. - -@findex extent() -@cindex Array member functions @code{extent()} -@example -int extent(int dimension) const; -@end example - -The first version the extent (length) of the array in the specified -dimension. See the note about dimension parameters such as @code{firstDim} -in the previous section. - -@findex extractComponent() -@cindex Array member functions @code{extractComponent()} -@cindex Array extracting components -@example -Array extractComponent(T_numtype2, - int componentNumber, int numComponents); -@end example - -This method returns an array view of a single component of a multicomponent -array. In a multicomponent array, each element is a tuple of fixed size. -The components are numbered 0, 1, ..., @code{numComponents-1}. Example: - -@example -Array,2> A(128,128); // A 128x128 array of int[3] - -Array B = A.extractComponent(int(), 1, 3); -@end example - -Now the B array refers to the 2nd component of every element in A. Note: -for complex arrays, special global functions @code{real(A)} and -@code{imag(A)} are provided to obtain real and imaginary components of an -array. See the @strong{Global Functions} section. - -@findex free() -@cindex Array member functions @code{free()} -@cindex Array freeing an -@example -void free(); -@end example - -This method resizes an array to zero size. If the array data is not being -shared with another array object, then it is freed. - -@findex isMajorRank() -@cindex Array member functions @code{isMajorRank()} -@example -bool isMajorRank(int dimension) const; -@end example - -Returns true if the dimension has the largest stride. For C-style arrays -(the default), the first dimension always has the largest stride. For -Fortran-style arrays, the last dimension has the largest stride. See also -@code{isMinorRank()} below and the note about dimension parameters such as -@code{firstDim} in the previous section. - -@findex isMinorRank() -@cindex Array member functions @code{isMinorRank()} -@example -bool isMinorRank(int dimension) const; -@end example - -Returns true if the dimension @emph{does not} have the largest stride. See -also @code{isMajorRank()}. - -@findex isRankStoredAscending() -@cindex Array member functions @code{isRankStoredAscending()} -@example -bool isRankStoredAscending(int dimension) const; -@end example - -Returns true if the dimension is stored in ascending order in memory. This -is the default. It will only return false if you have reversed a dimension -using @code{reverse()} or have created a custom storage order with a -descending dimension. - -@findex isStorageContiguous() -@cindex Array member functions @code{isStorageContiguous()} -@example -bool isStorageContiguous() const; -@end example - -Returns true if the array data is stored contiguously in memory. If you -slice the array or work on subarrays, there can be skips -- the array data -is interspersed with other data not part of the array. See also the various -@code{data..()} functions. If you need to ensure that the storage is -contiguous, try @code{reference(copy())}. - -@findex lbound() -@cindex Array member functions @code{lbound()} -@example -int lbound(int dimension) const; -TinyVector lbound() const; -@end example - -The first version returns the lower bound of the valid index range for a -dimension. The second version returns a vector of lower bounds for all -dimensions. The lower bound is the first valid index value. If you're -using a C-style array (the default), the lbound will be zero; Fortran-style -arrays have lbound equal to one. The lbound can be different for each -dimension, but only if you deliberately set them that way using a Range -constructor or a custom storage ordering. This function is equivalent to -@code{base(dimension)}. See the note about dimension parameters such as -@code{firstDim} in the previous section. - - -@findex makeUnique() -@cindex Array member functions @code{makeUnique()} -@cindex Array making unique copy -@example -void makeUnique(); -@end example - -If the array's data is being shared with another Blitz++ array object, this -member function creates a copy so the array object has a unique view of the -data. - -@findex numElements() -@cindex Array member functions @code{numElements()} -@cindex Array number of elements in -@example -int numElements() const; -@end example - -Returns the total number of elements in the array, calculated by taking the -product of the extent in each dimension. Same as @code{size()}. - -@findex ordering() -@cindex Array member functions @code{ordering()} -@cindex Array storage ordering of -@example -const TinyVector& ordering() const; -int ordering(int storageRankIndex) const; -@end example - -These member functions return information about how the data is ordered in -memory. The first version returns the complete ordering vector; the second -version returns a single element from the ordering vector. The argument for -the second version must be in the range 0 .. @code{N_rank}-1. The ordering -vector is a list of dimensions in increasing order of stride; -@code{ordering(0)} will return the dimension number with the smallest -stride, and @code{ordering(N_rank-1)} will return the dimension number with -largest stride. For a C-style array, the ordering vector contains the -elements (@code{N_rank}-1, @code{N_rank}-2, ..., 0). For a Fortran-style -array, the ordering vector is (0, 1, ..., @code{N_rank}-1). See also the -description of custom storage orders in section @ref{Array storage}. - -@findex rank() -@cindex Array member functions @code{rank()} -@example -int rank() const; -@end example - -Returns the rank (number of dimensions) of the array. The return value is -equal to @code{N_rank}. Equivalent to @code{dimensions()}. - -@findex reference() -@cindex Array member functions @code{reference()} -@cindex Array referencing another -@example -void reference(Array& A); -@end example - -This causes the array to adopt another array's data as its own. After this -member function is used, the array object and the array @code{A} are -indistinguishable -- they have identical sizes, index ranges, and data. The -data is shared between the two arrays. - -@findex reindex(), reindexSelf() -@cindex Array member functions @code{reindex()} -@cindex Array member functions @code{reindexSelf()} -@cindex Array reindexing -@example -void reindexSelf(const TinyVector&); -Array reindex(const TinyVector&); -@end example - -These methods reindex an array to use a new base vector. The first version -reindexes the array, and the second just returns a reindexed view of the -array, leaving the original array unmodified. - -@findex resize() -@cindex Array member functions @code{resize()} -@cindex Array resizing -@example -void resize(int extent1, ...); -void resize(const TinyVector&); -@end example - -These functions resize an array to the specified size. If the array is -already the size specified, then no memory is allocated. After resizing, -the contents of the array are garbage. See also @code{resizeAndPreserve()}. - -@findex resizeAndPreserve() -@cindex Array member functions @code{resizeAndPreserve()} -@example -void resizeAndPreserve(int extent1, ...); -void resizeAndPreserve(const TinyVector&); -@end example - -These functions resize an array to the specified size. If the array is -already the size specified, then no change occurs (the array is not -reallocated and copied). The contents of the array are preserved whenever -possible; if the new array size is smaller, then some data will be lost. -Any new elements created by resizing the array are left uninitialized. - -@findex reverse(), reverseSelf() -@cindex Array member functions @code{reverse()} -@cindex Array member functions @code{reverseSelf()} -@cindex Array reversing -@example -Array reverse(int dimension); -void reverseSelf(int dimension); -@end example - -This method reverses the array in the specified dimension. For example, if -@code{reverse(firstDim)} is invoked on a 2-dimensional array, then the -ordering of rows in the array will be reversed; @code{reverse(secondDim)} -would reverse the order of the columns. Note that this is implemented by -twiddling the strides of the array, and doesn't cause any data copying. The -first version returns a reversed ``view'' of the array data; the second -version applies the reversal to the array itself. - -@findex rows() -@cindex Array member functions @code{rows()} -@example -int rows() const; -@end example - -Returns the extent (length) of the array in the first dimension. This -function is equivalent to @code{extent(firstDim)}. See also -@code{columns()}, and @code{depth()}. - -@findex size() -@cindex Array member functions @code{size()} -@example -int size() const; -@end example - -Returns the total number of elements in the array, calculated by taking the -product of the extent in each dimension. Same as @code{numElements()}. - -@cindex @code{shape()} (Array method) -@cindex Array member functions @code{shape()} -@cindex Array shape of -@example -const TinyVector& shape() const; -@end example - -Returns the vector of extents (lengths) of the array. - -@findex stride() -@cindex Array member functions @code{stride()} -@cindex Array strides of -@example -const TinyVector& stride() const; -int stride(int dimension) const; -@end example - -The first version returns the stride vector; the second version returns the -stride associated with a dimension. A stride is the distance between -pointers to two array elements which are adjacent in a dimension. For -example, @code{A.stride(firstDim)} is equal to @code{&A(1,0,0) - &A(0,0,0)}. -The stride for the second dimension, @code{A.stride(secondDim)}, is equal to -@code{&A(0,1,0) - &A(0,0,0)}, and so on. For more information about -strides, see the description of custom storage formats in Section -@ref{Array storage}. See also the description of parameters like -@code{firstDim} and @code{secondDim} in the previous section. - -@cindex Array member functions @code{transpose()} -@cindex Array member functions @code{transposeSelf()} -@cindex Array transposing -@cindex transposing arrays -@findex transpose(), transposeSelf() -@example -Array transpose(int dimension1, - int dimension2, ...); -void transposeSelf(int dimension1, - int dimension2, ...); -@end example - -These methods permute the dimensions of the array. The dimensions of the -array are reordered so that the first dimension is @code{dimension1}, the -second is @code{dimension2}, and so on. The arguments should be a -permutation of the symbols @code{firstDim, secondDim, ...}. Note that this -is implemented by twiddling the strides of the array, and doesn't cause any -data copying. The first version returns a transposed ``view'' of the array -data; the second version transposes the array itself. - -@cindex Array member functions @code{ubound()} -@findex ubound() -@example -int ubound(int dimension) const; -TinyVector ubound() const; -@end example - -The first version returns the upper bound of the valid index range for a -dimension. The second version returns a vector of upper bounds for all -dimensions. The upper bound is the last valid index value. If you're using -a C-style array (the default), the ubound will be equal to the -@code{extent(dimension)-1}. Fortran-style arrays will have ubound equal to -@code{extent(dimension)}. The ubound can be different for each dimension. -The return value of @code{ubound(dimension)} will always be equal to -@code{lbound(dimension)+extent(dimension)-1}. See the note about -dimension parameters such as @code{firstDim} in the previous section. - -@findex zeroOffset() -@cindex Array member functions @code{zeroOffset()} -@example -int zeroOffset() const; -@end example - -This function has to do with the storage of arrays in memory. You may want -to refer to the description of the @code{data..()} member functions and of -custom storage orders in Section @ref{Array storage} for -clarification. The return value of @code{zeroOffset()} is the distance from -the first element in the array to the (possibly nonexistant) element -@code{(0,0,...,0)}. In this context, ``first element'' returns to the element -@code{(base(firstDim),base(secondDim),...)}. - diff --git a/doc/arrays-multi.texi b/doc/arrays-multi.texi deleted file mode 100644 index 5f085641..00000000 --- a/doc/arrays-multi.texi +++ /dev/null @@ -1,193 +0,0 @@ - -@node Array multi, Array usertype, , Customized Arrays -@section Multicomponent and complex arrays -@cindex Array multicomponent -@cindex multicomponent arrays - -Multicomponent arrays have elements which are vectors. Examples of such -arrays are vector fields, colour images (which contain, say, RGB tuples), -and multispectral images. Complex-valued arrays can also be regarded as -multicomponent arrays, since each element is a 2-tuple of real values. - -Here are some examples of multicomponent arrays: - -@cindex RGB24 example - -@example -// A 3-dimensional array; each element is a length 3 vector of float -Array,3> A; - -// A complex 2-dimensional array -Array,2> B; - -// A 2-dimensional image containing RGB tuples -struct RGB24 @{ - unsigned char r, g, b; -@}; - -Array C; -@end example - -@subsection Extracting components - -@cindex extracting components -@cindex Array extracting components - -Blitz++ provides some special support for such arrays. The most important -is the ability to extract a single component. For example: - -@example -Array,2> A(128,128); -Array B = A.extractComponent(float(), 1, 3); -B = 0; -@end example - -The call to @code{extractComponent} returns an array of floats; this array -is a view of the second component of each element of A. The arguments of -@code{extractComponent} are: (1) the type of the component (in this example, -float); (2) the component number to extract (numbered 0, 1, ... N-1); and -(3) the number of components in the array. - -This is a little bit messy, so Blitz++ provides a handy shortcut using -@code{operator[]}: - -@example -Array,2> A(128,128); -A[1] = 0; -@end example - -The number inside the square brackets is the component number. However, for -this operation to work, Blitz++ has to already know how many components -there are, and what type they are. It knows this already for -@code{TinyVector} and @code{complex}. If you use your own type, though, -you will have to tell Blitz++ this information using the macro -@code{BZ_DECLARE_MULTICOMPONENT_TYPE()}. This macro has three arguments: - -@findex BZ_DECLARE_MULTICOMPONENT_TYPE - -@example -BZ_DECLARE_MULTICOMPONENT_TYPE(T_element, T_componentType, numComponents) -@end example - -@code{T_element} is the element type of the array. @code{T_componentType} -is the type of the components of that element. @code{numComponents} is the -number of components in each element. - -An example will clarify this. Suppose we wanted to make a colour image, -stored in 24-bit HSV (hue-saturation-value) format. We can make a class -@code{HSV24} which represents a single pixel: - -@cindex HSV24 example - -@example -#include - -using namespace blitz; - -class HSV24 @{ -public: - // These constants will makes the code below cleaner; we can - // refer to the components by name, rather than number. - - static const int hue=0, saturation=1, value=2; - - HSV24() @{ @} - HSV24(int hue, int saturation, int value) - : h_(hue), s_(saturation), v_(value) - @{ @} - - // Some other stuff here, obviously - -private: - unsigned char h_, s_, v_; -@}; -@end example - -Right after the class declaration, we will invoke the macro -@code{BZ_DECLARE_MULTICOMPONENT_TYPE} to tell Blitz++ about HSV24: - -@example -// HSV24 has 3 components of type unsigned char -BZ_DECLARE_MULTICOMPONENT_TYPE(HSV24, unsigned char, 3); -@end example - -Now we can create HSV images and modify the individual components: - -@example -int main() -@{ - Array A(128,128); // A 128x128 HSV image - ... - - // Extract a greyscale version of the image - Array A_greyscale = A[HSV24::value]; - - // Bump up the saturation component to get a - // pastel effect - A[HSV24::saturation] *= 1.3; - - // Brighten up the middle of the image - Range middle(32,96); - A[HSV24::value](middle,middle) *= 1.2; -@} -@end example - -@subsection Special support for complex arrays - -@cindex Array complex -@cindex complex arrays - -Since complex arrays are used frequently, Blitz++ provides two special -methods for getting the real and imaginary components: - -@example -Array,2> A(32,32); - -real(A) = 1.0; -imag(A) = 0.0; -@end example - -The function @code{real(A)} returns an array view of the real component; -@code{imag(A)} returns a view of the imaginary component. - -Note: Blitz++ provides numerous math functions defined over complex-valued -arrays, such as @code{conj}, @code{polar}, @code{arg}, @code{abs}, -@code{cos}, @code{pow}, etc. See the section on math functions -(@ref{Math functions 1}) for details. - -@subsection Zipping together expressions -@cindex zipping expressions -@cindex Array zipping expressions - -Blitz++ provides a function @code{zip()} which lets you combine two or more -expressions into a single component. For example, you can combine two real -expressions into a complex expression, or three integer expressions into an -HSV24 expression. The function has this syntax: - -@example -resultexpr zip(expr1, expr2, T_element) -resultexpr zip(expr1, expr2, expr3, T_element) ** not available yet -resultexpr zip(expr1, expr2, expr3, expr4, T_element) ** not available yet -@end example - -The types @code{resultexpr}, @code{expr1} and @code{expr2} are array -expressions. The third argument is the type you want to create. For -example: - -@example -int N = 16; -Array,1> A(N); -Array theta(N); - - ... - -A = zip(cos(theta), sin(theta), complex()); -@end example - -The above line is equivalent to: - -@example -for (int i=0; i < N; ++i) - A[i] = complex(cos(theta[i]), sin(theta[i])); -@end example - diff --git a/doc/arrays-slicing.texi b/doc/arrays-slicing.texi deleted file mode 100644 index dcf8658c..00000000 --- a/doc/arrays-slicing.texi +++ /dev/null @@ -1,300 +0,0 @@ - -@node Array slicing, Array debug, Array ctors, Arrays -@section Indexing, subarrays, and slicing - -This section describes how to access the elements of an array. There are -three main ways: - -@itemize @bullet - -@item @strong{Indexing} obtains a single element - -@item Creating a @strong{subarray} which refers to a smaller portion of -an array - -@item @strong{Slicing} to produce a smaller-dimensional view of a portion -of an array - -@end itemize - -Indexing, subarrays and slicing all use the overloaded parenthesis -@code{operator()}. - -As a running example, we'll consider the three dimensional array pictured -below, which has index ranges (0..7, 0..7, 0..7). Shaded portions of the -array show regions which have been obtained by indexing, creating a -subarray, and slicing. - -@center @image{slice} -@center Examples of array indexing, subarrays, and slicing. - -@subsection Indexing -@cindex Array indexing -@cindex indexing an array - -There are two ways to get a single element from an array. The simplest is -to provide a set of integer operands to @code{operator()}: - -@example -A(7,0,0) = 5; -cout << "A(7,0,0) = " << A(7,0,0) << endl; -@end example - -This version of indexing is available for arrays of rank one through eleven. -If the array object isn't @code{const}, the return type of -@code{operator()} is a reference; if the array object is @code{const}, the -return type is a value. - -You can also get an element by providing an operand of type -@code{TinyVector} where @code{N_rank} is the rank of the array -object: - -@example -TinyVector index; -index = 7, 0, 0; -A(index) = 5; -cout << "A(7,0,0) = " << A(index) << endl; -@end example - -This version of @code{operator()} is also available in a const-overloaded -version. - -It's possible to use fewer than @code{N_rank} indices. However, missing -indices are @strong{assumed to be zero}, which will cause bounds errors if -the valid index range does not include zero (e.g. Fortran arrays). For this -reason, and for code clarity, it's a bad idea to omit indices. - -@subsection Subarrays -@cindex Array subarrays -@cindex subarrays -@cindex Range objects - -You can obtain a subarray by providing @code{Range} operands to -@code{operator()}. A @code{Range} object represents a set of regularly -spaced index values. For example, - -@example -Array B = A(Range(5,7), Range(5,7), Range(0,2)); -@end example - -The object B now refers to elements (5..7,5..7,0..2) of the array A. - -The returned subarray is of type @code{Array}. This means -that subarrays can be used wherever arrays can be: in expressions, as -lvalues, etc. Some examples: - -@example -// A three-dimensional stencil (used in solving PDEs) -Range I(1,6), J(1,6), K(1,6); -B = (A(I,J,K) + A(I+1,J,K) + A(I-1,J,K) + A(I,J+1,K) - + A(I,J-1,K) + A(I,J+1,K) + A(I,J,K+1) + A(I,J,K-1)) / 7.0; - -// Set a subarray of A to zero -A(Range(5,7), Range(5,7), Range(5,7)) = 0.; -@end example - -The bases of the subarray are equal to the bases of the original array: - -@example -Array D(Range(1,5), Range(1,5)); // 1..5, 1..5 -Array E = D(Range(2,3), Range(2,3)); // 1..2, 1..2 -@end example - -An array can be used on both sides of an expression only if the subarrays -don't overlap. If the arrays overlap, the result may depend on the order in -which the array is traversed. - -@subsection RectDomain and StridedDomain -@cindex RectDomain -@findex RectDomain -@cindex StridedDomain -@findex StridedDomain -@cindex TinyVector of Range (use @code{RectDomain}) - -The classes @code{RectDomain} and @code{StridedDomain}, defined in -@code{blitz/domain.h}, offer a dimension-independent notation for subarrays. - -@code{RectDomain} and @code{StridedDomain} can be thought of as a -@code{TinyVector}. Both have a vector of lower- and upper-bounds; -@code{StridedDomain} has a stride vector. For example, the subarray: - -@example -Array B = A(Range(4,7), Range(8,11)); // 4..7, 8..11 -@end example - -could be obtained using @code{RectDomain} this way: - -@example -TinyVector lowerBounds(4, 8); -TinyVector upperBounds(7, 11); -RectDomain<2> subdomain(lowerBounds, upperBounds); - -Array B = A(subdomain); -@end example - -Here are the prototypes of @code{RectDomain} and @code{StridedDomain}. - -@example -template -class RectDomain @{ - -public: - RectDomain(const TinyVector& lbound, - const TinyVector& ubound); - - const TinyVector& lbound() const; - int lbound(int i) const; - const TinyVector& ubound() const; - int ubound(int i) const; - Range operator[](int rank) const; - void shrink(int amount); - void shrink(int dim, int amount); - void expand(int amount); - void expand(int dim, int amount); -@}; - -template -class StridedDomain @{ - -public: - StridedDomain(const TinyVector& lbound, - const TinyVector& ubound, - const TinyVector& stride); - - const TinyVector& lbound() const; - int lbound(int i) const; - const TinyVector& ubound() const; - int ubound(int i) const; - const TinyVector& stride() const; - int stride(int i) const; - Range operator[](int rank) const; - void shrink(int amount); - void shrink(int dim, int amount); - void expand(int amount); - void expand(int dim, int amount); -@}; -@end example - -@subsection Slicing -@cindex Array slicing -@cindex slicing arrays - -A combination of integer and Range operands produces a @strong{slice}. Each -integer operand reduces the rank of the array by one. For example: - -@example -Array F = A(Range::all(), 2, Range::all()); -Array G = A(2, 7, Range::all()); -@end example - -Range and integer operands can be used in any combination, for arrays -up to rank 11. - -@strong{Caution:} Using a combination of integer and Range operands requires a -newer language feature (partial ordering of member templates) which not all -compilers support. If your compiler does provide this feature, -@code{BZ_PARTIAL_ORDERING} will be defined in @code{}. If -not, you can use this workaround: - -@example -Array F = A(Range::all(), Range(2,2), Range::all()); -Array G = A(Range(2,2), Range(7,7), Range::all()); -@end example - -@subsection More about Range objects -@cindex Range objects - -A @code{Range} object represents an ordered set of uniformly spaced -integers. Here are some examples of using Range objects to obtain -subarrays: - -@smallexample -@include examples/range.texi -@end smallexample - -The optional third constructor argument specifies a stride. For example, -@code{Range(1,5,2)} refers to elements [1 3 5]. Strides can also be -negative: @code{Range(5,1,-2)} refers to elements [5 3 1]. - -Note that if you use the same Range frequently, you can just construct one -object and use it multiple times. For example: - -@example -Range all = Range::all(); -A(0,all,all) = A(N-1,all,all); -A(all,0,all) = A(all,N-1,all); -A(all,all,0) = A(all,all,N-1); -@end example - -Here's an example of using strides with a two-dimensional -array: - -@smallexample -@include examples/strideslice.texi -@end smallexample - -Here's an illustration of the @code{B} subarray: - -@center @image{strideslice} -@center Using strides to create non-contiguous subarrays. - -And the program output: - -@smallexample -@include examples/strideslice.out -@end smallexample - -@subsection A note about assignment -@cindex Array =, meaning of -@cindex =, meaning of -@cindex shallow copies, see also reference() -@cindex assignment operator - -The assignment operator (@code{=}) always results in the expression on the -right-hand side (rhs) being @emph{copied} to the lhs (i.e.@: the data on the -lhs is overwritten with the result from the rhs). This is different from -some array packages in which the assignment operator makes the lhs a -reference (or alias) to the rhs. To further confuse the issue, the copy -constructor for arrays @emph{does} have reference semantics. Here's an -example which should clarify things: - -@example -Array A(5), B(10); -A = B(Range(0,4)); // Statement 1 -Array C = B(Range(0,4)); // Statement 2 -@end example - -Statement 1 results in a portion of @code{B}'s data being copied into -@code{A}. After Statement 1, both @code{A} and @code{B} have their own -(nonoverlapping) blocks of data. Contrast this behaviour with that of -Statement 2, which is @strong{not} an assignment (it uses the copy -constructor). After Statement 2 is executed, the array @code{C} is a -reference (or alias) to @code{B}'s data. - -So to summarize: If you want to copy the rhs, use an assignment operator. -If you want to reference (or alias) the rhs, use the copy constructor (or -alternately, the @code{reference()} member function in @ref{Array members}). - -@strong{Very important:} whenever you have an assignment operator (@code{=}, -@code{+=}, @code{-=}, etc.) the lhs @strong{must} have the same shape as the -@strong{rhs}. If you want the array on the left hand side to be resized to -the proper shape, you must do so by calling the @code{resize} method, for -example: - -@example -A.resize(B.shape()); // Make A the same size as B -A = B; -@end example - -@subsection An example - -@smallexample -@include examples/slicing.texi -@end smallexample - -The output: - -@smallexample -@include examples/slicing.out -@end smallexample - diff --git a/doc/arrays-stencils.texi b/doc/arrays-stencils.texi deleted file mode 100644 index 32e44051..00000000 --- a/doc/arrays-stencils.texi +++ /dev/null @@ -1,569 +0,0 @@ -@cindex stencil objects -@cindex Array stencils - -Blitz++ provides an implementation of stencil objects which is currently -@strong{experimental}. This means that the exact details of how they are -declared and used may change in future releases. Use at your own risk. - -@section Motivation: a nicer notation for stencils - -Suppose we wanted to implement the 3-D acoustic wave equation using finite -differencing. Here is how a single iteration would look using subarray -syntax: - -@example -Range I(1,N-2), J(1,N-2), K(1,N-2); - -P3(I,J,K) = (2-6*c(I,J,K)) * P2(I,J,K) - + c(I,J,K)*(P2(I-1,J,K) + P2(I+1,J,K) + P2(I,J-1,K) + P2(I,J+1,K) - + P2(I,J,K-1) + P2(I,J,K+1)) - P1(I,J,K); -@end example - -This syntax is a bit klunky. With stencil objects, the implementation -becomes: - -@example -BZ_DECLARE_STENCIL4(acoustic3D_stencil,P1,P2,P3,c) - P3 = 2 * P2 + c * Laplacian3D(P2) - P1; -BZ_END_STENCIL - - . - . - -applyStencil(acoustic3D_stencil(), P1, P2, P3, c); -@end example - - -@node Stencil object, Stencil operator, , Stencils -@section Declaring stencil objects -@cindex stencil objects declaring - -A stencil declaration may not be inside a function. It can appear inside a -class declaration (in which case the stencil object is a nested type). - -Stencil objects are declared using the macros @code{BZ_DECLARE_STENCIL1}, -@code{BZ_DECLARE_STENCIL2}, etc. The number suffix is how many arrays are -involved in the stencil (in the above example, 4 arrays-- P1, P2, P3, c -- are -used, so the macro @code{BZ_DECLARE_STENCIL4} is invoked). - -The first argument is a name for the stencil object. Subsequent arguments -are names for the arrays on which the stencil operates. - -After the stencil declaration, the macro @code{BZ_END_STENCIL} must appear -(or the macro @code{BZ_END_STENCIL_WITH_SHAPE}, described in the next -section). - -In between the two macros, you can have multiple assignment statements, -if/else/elseif constructs, function calls, loops, etc. - -Here are some simple examples: - -@findex BZ_DECLARE_STENCIL - -@example -BZ_DECLARE_STENCIL2(smooth2D,A,B) - A = (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0; -BZ_END_STENCIL - -BZ_DECLARE_STENCIL4(acoustic2D,P1,P2,P3,c) - A = 2 * P2 + c * (-4 * P2(0,0) + P2(0,1) + P2(0,-1) + P2(1,0) + P2(-1,0)) - - P1; -BZ_END_STENCIL - -BZ_DECLARE_STENCIL8(prop2D,E1,E2,E3,M1,M2,M3,cE,cM) - E3 = 2 * E2 + cE * Laplacian2D(E2) - E1; - M3 = 2 * M2 + cM * Laplacian2D(M2) - M1; -BZ_END_STENCIL - -BZ_DECLARE_STENCIL3(smooth2Db,A,B,c) - if ((c > 0.0) && (c < 1.0)) - A = c * (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0 - + (1-c)*B; - else - A = 0; -BZ_END_STENCIL -@end example - -Currently, a stencil can take up to 11 array parameters. - -You can use the notation @code{A(i,j,k)} to read the element at an offset -@code{(i,j,k)} from the current element. If you omit the parentheses -(i.e.@: as in ``@code{A}'' then the current element is read. - -You can invoke @emph{stencil operators} which calculate finite differences -and laplacians. - -@section Automatic determination of stencil extent - -In stencil declarations such as - -@example -BZ_DECLARE_STENCIL2(smooth2D,A,B) - A = (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0; -BZ_END_STENCIL -@end example - -Blitz++ will try to automatically determine the spatial extent of the -stencil. This will usually work for stencils defined on integer or float -arrays. However, the mechanism does not work well for complex-valued -arrays, or arrays of user-defined types. If you get a peculiar error when -you try to use a stencil, you probably need to tell Blitz++ the special -extent of the stencil manually. - -You do this by ending a stencil declaration with -@code{BZ_END_STENCIL_WITH_SHAPE}: - -@example -BZ_DECLARE_STENCIL2(smooth2D,A,B) - A = (B(0,0) + B(0,1) + B(0,-1) + B(1,0) + B(-1,0)) / 5.0; -BZ_END_STENCIL_WITH_SHAPE(shape(-1,-1),shape(+1,+1)) -@end example - -The parameters of this macro are: a @code{TinyVector} (constructed by the -@code{shape()} function) containing the lower bounds of the stencil offsets, -and a @code{TinyVector} containing the upper bounds. You can determine this -by looking at the the terms in the stencil and finding the minimum and -maximum value of each index: - -@example - A = (B(0, 0) - + B(0, +1) - + B(0, -1) - + B(+1, 0) - + B(-1, 0)) / 5.0; - -------- -min indices -1, -1 -max indices +1, +1 -@end example - -@node Stencil operator, Stencil customize, Stencil object, Stencils -@section Stencil operators -@cindex stencil operators - -This section lists all the stencil operators provided by Blitz++. They -assume that an array represents evenly spaced data points separated by a -distance of @code{h}. A 2nd-order accurate operator has error term -@math{O(h^2)}; a 4th-order accurate operator has error term @math{O(h^4)}. - -All of the stencils have factors associated with them. For example, the -@code{central12} operator is a discrete first derivative which is 2nd-order -accurate. Its factor is 2h; this means that to get the first derivative of -an array A, you need to use @code{central12(A,firstDim)}@math{/(2h)}. -Typically when designing stencils, one factors out all of the @math{h} terms -for efficiency. - -The factor terms always consist of an integer multiplier (often 1) and a -power of @math{h}. For ease of use, all of the operators listed below are -provided in a second ``normalized'' version in which the integer multiplier -is 1. The normalized versions have an @code{n} appended to the name, for -example @code{central12n} is the normalized version of @code{central12}, and -has factor @math{h} instead of @math{2h}. - -These operators are defined in @code{blitz/array/stencilops.h} if you wish -to see the implementation. - -@subsection Central differences -@cindex central differences - -@table @code -@item central12(A,dimension) -1st derivative, 2nd order accurate. Factor: @math{2h} -@include stencils/central12.texi - -@item central22(A,dimension) -2nd derivative, 2nd order accurate. Factor: @math{h^2} -@include stencils/central22.texi - -@item central32(A,dimension) -3rd derivative, 2nd order accurate. Factor: @math{2h^3} -@include stencils/central32.texi - -@item central42(A,dimension) -4th derivative, 2nd order accurate. Factor: @math{h^4} -@include stencils/central42.texi - -@item central14(A,dimension) -1st derivative, 4th order accurate. Factor: @math{12h} -@include stencils/central14.texi - -@item central24(A,dimension) -2nd derivative, 4th order accurate. Factor: @math{12h^2} -@include stencils/central24.texi - -@item central34(A,dimension) -3rd derivative, 4th order accurate. Factor: @math{8h^3} -@include stencils/central34.texi - -@item central44(A,dimension) -4th derivative, 4th order accurate. Factor: @math{6h^4} -@include stencils/central44.texi -@end table - -Note that the above are available in normalized versions @code{central12n}, -@code{central22n}, ..., @code{central44n} which have factors of @math{h}, -@math{h^2}, @math{h^3}, or @math{h^4} as appropriate. - -These are available in multicomponent versions: for example, -@code{central12(A,component,dimension)} gives the central12 operator for the -specified component (Components are numbered 0, 1, ... N-1). - -@subsection Forward differences -@cindex forward differences - -@table @code -@item forward11(A,dimension) -1st derivative, 1st order accurate. Factor: @math{h} -@include stencils/forward11.texi - -@item forward21(A,dimension) -2nd derivative, 1st order accurate. Factor: @math{h^2} -@include stencils/forward21.texi - -@item forward31(A,dimension) -3rd derivative, 1st order accurate. Factor: @math{h^3} -@include stencils/forward31.texi - -@item forward41(A,dimension) -4th derivative, 1st order accurate. Factor: @math{h^4} -@include stencils/forward41.texi - -@item forward12(A,dimension) -1st derivative, 2nd order accurate. Factor: @math{2h} -@include stencils/forward12.texi - -@item forward22(A,dimension) -2nd derivative, 2nd order accurate. Factor: @math{h^2} -@include stencils/forward22.texi - -@item forward32(A,dimension) -3rd derivative, 2nd order accurate. Factor: @math{2h^3} -@include stencils/forward32.texi - -@item forward42(A,dimension) -4th derivative, 2nd order accurate. Factor: @math{h^4} -@include stencils/forward42.texi -@end table - -Note that the above are available in normalized versions @code{forward11n}, -@code{forward21n}, ..., @code{forward42n} which have factors of @math{h}, -@math{h^2}, @math{h^3}, or @math{h^4} as appropriate. - -These are available in multicomponent versions: for example, -@code{forward11(A,component,dimension)} gives the forward11 operator for the -specified component (Components are numbered 0, 1, ... N-1). - -@subsection Backward differences -@cindex backward differences - -@table @code -@item backward11(A,dimension) -1st derivative, 1st order accurate. Factor: @math{h} -@include stencils/backward11.texi - -@item backward21(A,dimension) -2nd derivative, 1st order accurate. Factor: @math{h^2} -@include stencils/backward21.texi - -@item backward31(A,dimension) -3rd derivative, 1st order accurate. Factor: @math{h^3} -@include stencils/backward31.texi - -@item backward41(A,dimension) -4th derivative, 1st order accurate. Factor: @math{h^4} -@include stencils/backward41.texi - -@item backward12(A,dimension) -1st derivative, 2nd order accurate. Factor: @math{2h} -@include stencils/backward12.texi - -@item backward22(A,dimension) -2nd derivative, 2nd order accurate. Factor: @math{h^2} -@include stencils/backward22.texi - -@item backward32(A,dimension) -3rd derivative, 2nd order accurate. Factor: @math{2h^3} -@include stencils/backward32.texi - -@item backward42(A,dimension) -4th derivative, 2nd order accurate. Factor: @math{h^4} -@include stencils/backward42.texi -@end table - -Note that the above are available in normalized versions @code{backward11n}, -@code{backward21n}, ..., @code{backward42n} which have factors of @math{h}, -@math{h^2}, @math{h^3}, or @math{h^4} as appropriate. - -These are available in multicomponent versions: for example, -@code{backward42(A,component,dimension)} gives the backward42 operator for -the specified component (Components are numbered 0, 1, ... N-1). - -@subsection Laplacian (@math{@nabla ^2}) operators -@cindex Laplacian operators - -@table @code -@item Laplacian2D(A) -2nd order accurate, 2-dimensional laplacian. Factor: @math{h^2} -@include stencils/Laplacian2D.texi - -@item Laplacian3D(A) -2nd order accurate, 3-dimensional laplacian. Factor: @math{h^2} - -@item Laplacian2D4(A) -4th order accurate, 2-dimensional laplacian. Factor: @math{12h^2} -@include stencils/Laplacian2D4.texi - -@item Laplacian3D4(A) -4th order accurate, 3-dimensional laplacian. Factor: @math{12h^2} -@end table - -Note that the above are available in normalized versions -@code{Laplacian2D4n}, @code{Laplacian3D4n} which have factors @math{h^2}. - -@subsection Gradient (@math{@nabla}) operators -@cindex gradient operators - -These return @code{TinyVector}s of the appropriate numeric type and length: - -@table @code - -@item grad2D(A) -2nd order, 2-dimensional gradient (vector of first derivatives), generated -using the central12 operator. Factor: @math{2h} - -@item grad2D4(A) -4th order, 2-dimensional gradient, using central14 operator. Factor: @math{12h} - -@item grad3D(A) -2nd order, 3-dimensional gradient, using central12 operator. Factor: @math{2h} - -@item grad3D4(A) -4th order, 3-dimensional gradient, using central14 operator. Factor: @math{12h} -@end table - -These are available in normalized versions @code{grad2Dn}, @code{grad2D4n}, -@code{grad3Dn} and @code{grad3D4n} which have factors @math{h}. - -@subsection Jacobian operators -@cindex Jacobian operators - -The Jacobian operators are defined over 3D vector fields only (e.g. -@code{Array,3>}). They return a -@code{TinyMatrix} where T is the numeric type of the vector field. - -@table @code -@item Jacobian3D(A) -2nd order, 3-dimensional Jacobian using the central12 operator. Factor: -@math{2h}. - -@item Jacobian3D4(A) -4th order, 3-dimensional Jacobian using the central14 operator. Factor: -@math{12h}. -@end table - -These are also available in normalized versions @code{Jacobian3Dn} and -@code{Jacobain3D4n} which have factors @math{h}. - -@subsection Grad-squared operators -@cindex Grad-squared operators - -There are also grad-squared operators, which return @code{TinyVector}s of -second derivatives: - -@table @code -@item gradSqr2D(A) -2nd order, 2-dimensional grad-squared (vector of second derivatives), -generated using the central22 operator. Factor: @math{h^2} - -@item gradSqr2D4(A) -4th order, 2-dimensional grad-squared, using central24 operator. Factor: -@math{12h^2} - -@item gradSqr3D(A) -2nd order, 3-dimensional grad-squared, using the central22 operator. -Factor: @math{h^2} - -@item gradSqr3D4(A) -4th order, 3-dimensional grad-squared, using central24 operator. Factor: -@math{12h^2} -@end table - -Note that the above are available in normalized versions @code{gradSqr2Dn}, -@code{gradSqr2D4n}, @code{gradSqr3Dn}, @code{gradSqr3D4n} which have factors -@math{h^2}. - -@subsection Curl (@math{@nabla @times}) operators -@cindex curl operator - -These curl operators return scalar values: - -@table @code -@item curl(Vx,Vy) -2nd order curl operator using the central12 operator. Factor: @math{2h} - -@item curl4(Vx,Vy) -4th order curl operator using the central14 operator. Factor: @math{12h} - -@item curl2D(V) -2nd order curl operator on a 2D vector field (e.g.@: -@code{Array,2>}), using the central12 operator. Factor: -@math{2h} - -@item curl2D4(V) -4th order curl operator on a 2D vector field, using the central12 operator. -Factor: @math{12h} -@end table - -Available in normalized forms @code{curln}, @code{curl4n}, @code{curl2Dn}, -@code{curl2D4n}. - -These curl operators return three-dimensional @code{TinyVector}s of the -appropriate numeric type: - -@table @code -@item curl(Vx,Vy,Vz) -2nd order curl operator using the central12 operator. Factor: @math{2h} - -@item curl4(Vx,Vy,Vz) -4th order curl operator using the central14 operator. Factor: @math{12h} - -@item curl(V) -2nd order curl operator on a 3D vector field (e.g.@: -@code{Array,3>}, using the central12 operator. Factor: -@math{2h} - -@item curl4(V) -4th order curl operator on a 3D vector field, using the central14 operator. -Factor: @math{12h} -@end table - -Note that the above are available in normalized versions @code{curln} and -@code{curl4n}, which have factors of @code{h}. - -@subsection Divergence (@math{@nabla @cdot}) operators -@cindex divergence operator - -The divergence operators return a scalar value. - -@table @code -@item div(Vx,Vy) -2nd order div operator using the central12 operator. Factor: @math{2h} - -@item div4(Vx,Vy) -4th order div operator using the central14 operator. Factor: @math{12h} - -@item div(Vx,Vy,Vz) -2nd order div operator using the central12 operator. Factor: @math{2h} - -@item div4(Vx,Vy,Vz) -4th order div operator using the central14 operator. Factor: @math{12h} - -@item div2D(V) -2nd order div operator on a 2D vector field, using the central12 operator. -Factor: @math{2h} - -@item div2D4(V) -2nd order div operator on a 2D vector field, using the central14 operator. -Factor: @math{12h} - -@item div3D(V) -2nd order div operator on a 3D vector field, using the central12 operator. -Factor: @math{2h} - -@item div3D4(V) -2nd order div operator on a 3D vector field using the central14 operator. -Factor: @math{12h} -@end table - -These are available in normalized versions -@code{divn}, @code{div4n}, @code{div2Dn}, @code{div2D4n}, @code{div3Dn}, and -@code{div3D4n} which have factors of @math{h}. - -@subsection Mixed partial derivatives -@cindex mixed partial operators - -@table @code -@item mixed22(A,dim1,dim2) -2nd order accurate, 2nd mixed partial derivative. Factor: @math{4h^2} - -@item mixed24(A,dim1,dim2) -4th order accurate, 2nd mixed partial derivative. Factor: @math{144h^2} -@end table - -There are also normalized versions of the above, @code{mixed22n} and -@code{mixed24n} which have factors @math{h^2}. - -@node Stencil customize, Stencil apply, Stencil operator, Stencils -@section Declaring your own stencil operators -@cindex stencil operators declaring your own - -You can declare your own stencil operators using the macro -@code{BZ_DECLARE_STENCIL_OPERATOR1}. For example, here is the declaration -of @code{Laplacian2D}: - -@example -BZ_DECLARE_STENCIL_OPERATOR1(Laplacian2D, A) - return -4*A(0,0) + A(-1,0) + A(1,0) + A(0,-1) + A(0,1); -BZ_END_STENCIL_OPERATOR -@end example - -To declare a stencil operator on 3 operands, use the macro -@code{BZ_DECLARE_STENCIL_OPERATOR3}. Here is the declaration of @code{div}: - -@example -BZ_DECLARE_STENCIL_OPERATOR3(div,vx,vy,vz) - return central12(vx,firstDim) + central12(vy,secondDim) - + central12(vz,thirdDim); -BZ_END_STENCIL_OPERATOR -@end example - -The macros aren't magical; they just declare an inline template function -with the names and arguments you specify. For example, the declaration of -@code{div} could also be written - -@example -template -inline typename T::T_numtype div(T& vx, T& vy, T& vz) -@{ - return central12(vx,firstDim) + central12(vy,secondDim) - + central12(vz,thirdDim); -@} -@end example - -The template parameter @code{T} is an iterator type for arrays. - -You are encouraged to use the macros when possible, because it is possible -the implementation could be changed in the future. - -To declare a difference operator, use this syntax: - -@example -BZ_DECLARE_DIFF(central12,A) @{ - return A.shift(1,dim) - A.shift(-1,dim); -@} -@end example - -The method @code{shift(offset,dim)} retrieves the element at -@code{offset} in dimension @code{dim}. - -Stencil operator declarations cannot occur inside a function. If -declared inside a class, they are scoped by the class. - -@node Stencil apply, , Stencil customize, Stencils -@section Applying a stencil object -@cindex stencil objects applying - -The syntax for applying a stencil is: - -@example -applyStencil(stencilname(),A,B,C...,F); -@end example - -Where @code{stencilname} is the name of the stencil, and @code{A,B,C,...,F} -are the arrays on which the stencil operates. - -For examples, see @file{examples/stencil.cpp} and @file{examples/stencil2.cpp}. - -Blitz++ interrogates the stencil object to find out how large its footprint -is. It only applies the stencil over the region of the arrays where it -won't overrun the boundaries. - diff --git a/doc/arrays-storage.texi b/doc/arrays-storage.texi deleted file mode 100644 index 135ba7b8..00000000 --- a/doc/arrays-storage.texi +++ /dev/null @@ -1,345 +0,0 @@ - -@node Array storage, , Array I/O, Arrays -@section Array storage orders -@cindex Array storage formats -@cindex storage of arrays - -Blitz++ is very flexible about the way arrays are stored in memory. -Starting indices can be 0, 1, or arbitrary numbers; arrays can be stored in -row major, column major or an order based on any permutation of the -dimensions; each dimension can be stored in either ascending or descending -order. An N dimensional array can be stored in @math{N! 2^N} possible ways. - -Before getting into the messy details, a review of array storage formats is -useful. If you're already familiar with strides and bases, you might -want to skip on to the next section. - -@subsection Fortran and C-style arrays - -Suppose we want to store this two-dimensional array in memory: - -@example -[ 1 2 3 ] -[ 4 5 6 ] -[ 7 8 9 ] -@end example - -@unnumberedsubsubsec Row major vs. column major - -To lay the array out in memory, it's necessary to map the indices (i,j) into -a one-dimensional block. Here are two ways the array might appear in -memory: - -@example -[ 1 2 3 4 5 6 7 8 9 ] -[ 1 4 7 2 5 8 3 6 9 ] -@end example - -The first order corresponds to a C or C++ style array, and is called -@emph{row-major ordering}: the data is stored first by row, and then by -column. The second order corresponds to a Fortran style array, and is -called @emph{column-major ordering}: the data is stored first by column, and -then by row. - -The simplest way of mapping the indices (i,j) into one-dimensional memory is -to take a linear combination.@footnote{Taking a linear combination is -sufficient for dense, asymmetric arrays, such as are provided by the Blitz++ -@code{Array} class.} Here's the appropriate linear combination for row -major ordering: - -@example -memory offset = 3*i + 1*j -@end example - -And for column major ordering: - -@example -memory offset = 1*i + 3*j -@end example - -The coefficients of the (i,j) indices are called @emph{strides}. For a row -major storage of this array, the @emph{row stride} is 3 -- you have to skip -three memory locations to move down a row. The @emph{column stride} is 1 -- -you move one memory location to move to the next column. This is also known -as @emph{unit stride}. For column major ordering, the row and column -strides are 1 and 3, respectively. - -@unnumberedsubsubsec Bases - -To throw another complication into this scheme, C-style arrays have indices -which start at zero, and Fortran-style arrays have indices which start at -one. The first valid index value is called the @emph{base}. To account for -a non-zero base, it's necessary to include an offset term in addition to the -linear combination. Here's the mapping for a C-style array with i=0..3 and -j=0..3: - -@example -memory offset = 0 + 3*i + 1*j -@end example - -No offset is necessary since the indices start at zero for C-style arrays. -For a Fortran-style array with i=1..4 and j=1..4, the mapping would be: - -@example -memory offset = -4 + 3*i + 1*j -@end example - -By default, Blitz++ creates arrays in the C-style storage format (base zero, -row major ordering). To create a Fortran-style array, you can use this -syntax: - -@example -Array A(3, 3, FortranArray<2>()); -@end example - -The third parameter, @code{FortranArray<2>()}, tells the @code{Array} -constructor to use a storage format appropriate for two-dimensional Fortran -arrays (base one, column major ordering). - -A similar object, @code{ColumnMajorArray}, tells the @code{Array} constructor -to use column major ordering, with base zero: - -@example -Array B(3, 3, ColumnMajorArray<2>()); -@end example - -This creates a 3x3 array with indices i=0..2 and j=0..2. - -In addition to supporting the 0 and 1 conventions for C and Fortran-style -arrays, Blitz++ allows you to choose arbitrary bases, possibly different for -each dimension. For example, this declaration creates an array whose -indices have ranges i=5..8 and j=2..5: - -@example -Array A(Range(5,8), Range(2,5)); -@end example - -@subsection Creating custom storage orders - -@cindex storage order, creating your own -@cindex Array storage order, creating your own - -All @code{Array} constructors take an optional parameter of type -@code{GeneralArrayStorage}. This parameter encapsulates a complete -description of the storage format. If you want a storage format other than -C or Fortran-style, you have two choices: - -@itemize @bullet - -@item You can create an object of type -@code{GeneralArrayStorage}, customize the storage format, and use -the object as a argument for the @code{Array} constructor. - -@item You can create your own storage format object which inherits from -@code{GeneralArrayStorage}. This is useful if you will be using the -storage format many times. This approach (inheriting from -@code{GeneralArrayStorage}) was used to create the -@code{FortranArray} objects. If you want to take this approach, you -can use the declaration of @code{FortranArray} in -@code{} as a guide. - -@end itemize - -The next sections describe how to modify a -@code{GeneralArrayStorage} object to suit your needs. - -@unnumberedsubsubsec In higher dimensions - -In more than two dimensions, the choice of storage order becomes more -complicated. Suppose we had a 3x3x3 array. To map the indices (i,j,k) into -memory, we might choose one of these mappings: - -@example -memory offset = 9*i + 3*j + 1*k -memory offset = 1*i + 3*j + 9*k -@end example - -The first corresponds to a C-style array, and the second to a Fortran-style -array. But there are other choices; we can permute the strides (1,3,9) any -which way: - -@example -memory offset = 1*i + 9*j + 3*k -memory offset = 3*i + 1*j + 9*k -memory offset = 3*i + 9*j + 1*k -memory offset = 9*i + 1*j + 3*k -@end example - -For an N dimensional array, there are N! such permutations. Blitz++ allows -you to select any permutation of the dimensions as a storage order. First -you need to create an object of type @code{GeneralArrayStorage}: - -@example -GeneralArrayStorage<3> storage; -@end example - -@code{GeneralArrayStorage} contains a vector called @code{ordering} -which controls the order in which dimensions are stored in memory. The -@code{ordering} vector will contain a permutation of the numbers 0, 1, ..., -N_rank-1. Since some people are used to the first dimension being 1 rather -than 0, a set of symbols (firstDim, secondDim, ..., eleventhDim) are -provided which make the code more legible. - -The @code{ordering} vector lists the dimensions in increasing order of -stride. You can access this vector using the member function -@code{ordering()}. A C-style array, the default, would have: - -@example -storage.ordering() = thirdDim, secondDim, firstDim; -@end example - -meaning that the third index (k) is associated with the smallest stride, and -the first index (i) is associated with the largest stride. A Fortran-style -array would have: - -@example -storage.ordering() = firstDim, secondDim, thirdDim; -@end example - -@unnumberedsubsubsec Reversed dimensions - -To add yet another wrinkle, there are some applications where the rows or -columns need to be stored in reverse order.@footnote{For example, certain -bitmap formats store image rows from bottom to top rather than top to -bottom.} - -Blitz++ allows you to store each dimension in either ascending or descending -order. By default, arrays are always stored in ascending order. The -@code{GeneralArrayStorage} object contains a vector called -@code{ascendingFlag} which indicates whether each dimension is stored -ascending (@code{true}) or descending (@code{false}). To alter the contents -of this vector, use the @code{ascendingFlag()} method: - -@example -// Store the third dimension in descending order -storage.ascendingFlag() = true, true, false; - -// Store all the dimensions in descending order -storage.ascendingFlag() = false, false, false; -@end example - -@unnumberedsubsubsec Setting the base vector - -@code{GeneralArrayStorage} also has a @code{base} vector which -contains the base index value for each dimension. By default, the base -vector is set to zero. @code{FortranArray} sets the base vector to -one. - -To set your own set of bases, you have two choices: - -@itemize @bullet - -@item You can modify the @code{base} vector inside the -@code{GeneralArrayStorage} object. The method @code{base()} returns -a mutable reference to the @code{base} vector which you can use to set the -bases. - -@item You can provide a set of @code{Range} arguments to the -@code{Array} constructor. - -@end itemize - -Here are some examples of the first approach: - -@example -// Set all bases equal to 5 -storage.base() = 5; - -// Set the bases to [ 1 0 1 ] -storage.base() = 1, 0, 1; -@end example - -And of the second approach: - -@example -// Have bases of 5, but otherwise C-style storage -Array A(Range(5,7), Range(5,7), Range(5,7)); - -// Have bases of [ 1 0 1 ] and use a custom storage -Array B(Range(1,4), Range(0,3), Range(1,4), storage); -@end example - -@unnumberedsubsubsec Working simultaneously with different storage orders - -Once you have created an array object, you will probably never have to worry -about its storage order. Blitz++ should handle arrays of different storage -orders transparently. It's possible to mix arrays of different storage -orders in one expression, and still get the correct result. - -Note however, that mixing different storage orders in an expression may -incur a performance penalty, since Blitz++ will have to pay more attention -to differences in indexing than it normally would. - -You may not mix arrays with different domains in the same expression. For -example, adding a base zero to a base one array is a no-no. The reason for -this restriction is that certain expressions become ambiguous, for example: - -@example -Array A(Range(0,5)), B(Range(1,6)); -A=0; -B=0; -using namespace blitz::tensor; -int result = sum(A+B+i); -@end example - -Should the index @code{i} take its domain from array @code{A} or array -@code{B}? To avoid such ambiguities, users are forbidden from mixing arrays -with different domains in an expression. - -@unnumberedsubsubsec Debug dumps of storage order information - -In debug mode (@code{-DBZ_DEBUG}), class @code{Array} provides a member -function @code{dumpStructureInformation()} which displays information about -the array storage: - -@example -Array A(3,7,8,2,FortranArray<4>()); -A.dumpStructureInformation(cerr); -@end example - -The optional argument is an @code{ostream} to dump information to. It -defaults to @code{cout}. Here's the output: - -@smallexample -@include examples/dump.out -@end smallexample - -@unnumberedsubsubsec A note about storage orders and initialization - -When initializing arrays with comma delimited lists, note that the array is -filled in storage order: from the first memory location to the last memory -location. This won't cause any problems if you stick with C-style arrays, -but it can be confusing for Fortran-style arrays: - -@example -Array A(3, 3, FortranArray<2>()); -A = 1, 2, 3, - 4, 5, 6, - 7, 8, 9; -cout << A << endl; -@end example - -The output from this code excerpt will be: - -@example -A = 3 x 3 - 1 4 7 - 2 5 8 - 3 6 9 -@end example - -This is because Fortran-style arrays are stored in column -major order. - -@subsection Storage orders example - -@smallexample -@include examples/storage.texi -@end smallexample - -And the output: - -@smallexample -@include examples/storage.out -@end smallexample - diff --git a/doc/arrays-types.texi b/doc/arrays-types.texi deleted file mode 100644 index d80f5d26..00000000 --- a/doc/arrays-types.texi +++ /dev/null @@ -1,22 +0,0 @@ - -@node Array types, Array ctors, Array intro, Arrays -@section Public types - -The @code{Array} class declares these public types: - -@itemize @bullet - -@item @code{T_numtype} is the element type stored in the array. For -example, the type @code{Array::T_numtype} would be @code{double}. - -@item @code{T_index} is a vector index into the array. The class -@code{TinyVector} is used for this purpose. - -@item @code{T_array} is the array type itself -(@code{Array}) - -@item @code{T_iterator} is an iterator type. NB: this iterator is not -yet fully implemented, and is NOT STL compatible at the present time. - -@end itemize - diff --git a/doc/arrays-usertype.texi b/doc/arrays-usertype.texi deleted file mode 100644 index afb372c8..00000000 --- a/doc/arrays-usertype.texi +++ /dev/null @@ -1,42 +0,0 @@ - -@node Array usertype, , Array multi, Customized Arrays -@section Creating arrays of a user type -@cindex Array of your own types - -You can use the @code{Array} class with types you have created yourself, or -types from another library. If you want to do arithmetic on the array, -whatever operators you use on the arrays have to be defined on the -underlying type. - -For example, here's a simple class for doing fixed point computations in the -interval [0,1]: - -@smallexample -@include examples/fixed-point.texi -@end smallexample - -The function @code{huge(T)} returns the largest representable value for type -T; in the example above, it's equal to @code{UINT_MAX}. - -The @code{FixedPoint} class declares three useful operations: conversion -from @code{double}, addition, and outputing to an @code{ostream}. We can -use all of these operations on an @code{Array} object: - -@smallexample -@include examples/fixed.texi -@end smallexample - -Note that the array @code{A} is initialized using a comma-delimited list of -@code{double}; this makes use of the constructor @code{FixedPoint(double)}. -The assignment @code{B = A + 0.05} uses -@code{FixedPoint::operator+(FixedPoint)}, with an implicit conversion from -@code{double} to @code{FixedPoint}. Formatting the array @code{B} onto the -standard output stream is done using the output operator defined for -@code{FixedPoint}. - -Here's the program output: - -@smallexample -@include examples/fixed.out -@end smallexample - diff --git a/doc/blitz.gif b/doc/blitz.gif deleted file mode 100644 index d59a78fd..00000000 Binary files a/doc/blitz.gif and /dev/null differ diff --git a/doc/blitz.texi b/doc/blitz.texi deleted file mode 100644 index d4e6fa8d..00000000 --- a/doc/blitz.texi +++ /dev/null @@ -1,295 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header -@setfilename blitz.info -@include version.texi -@settitle Blitz++ -@setchapternewpage odd -@finalout -@iftex -@afourpaper -@end iftex -@c %**end of header - -@dircategory Blitz++ library -@direntry -* Blitz++: (blitz++). High-performance C++ numeric library -@end direntry - - -@macro faq{question} -@strong{@bullet{} \question\} -@end macro - -@c kludge workaround from Karl Berry for math in @subsection -@ifnottex -@macro nabla -\\nabla -@end macro -@macro times -\\times -@end macro -@macro cdot -\\cdot -@end macro -@end ifnottex - -@titlepage -@title Blitz++ User's Guide -@subtitle A C++ class library for scientific computing -@subtitle for version @value{VERSION}, @value{UPDATED} -@author Todd Veldhuizen -@page -@vskip 0pt plus 1filll -@include copyright.texi -@end titlepage - -@summarycontents -@contents -@page - -@ifnottex -@node Top, , , (DIR) -@top Top -@end ifnottex - -@menu -* Introduction:: Introduction -* Arrays:: Arrays -* Array Expressions:: Array Expressions -* Stencils:: Stencils -* Customized Arrays:: Multicomponent, complex, and user type Arrays -* Indirection:: Indirection -* TinyVector:: TinyVector -* Parallel Computing:: Parallel Computing with Blitz++ -* Random Number Generators:: Random Number Generators -* Numeric properties:: Numeric properties -* FAQ:: Frequently Asked Questions -* Keyword Index:: Blitz Keyword Index -* Concept Index:: Concept Index - -@detailmenu - --- The Detailed Node Listing --- - -General considerations - -* about:: About this document -* platforms:: Platform/compiler notes -* download:: How to download Blitz++ -* install:: Installation and porting -* compiling:: Compiling with Blitz++ -* legal:: Licensing terms -* help:: Mailing lists and support - -The Blitz++ Array class - -* Array intro:: Getting started -* Array types:: Public types -* Array ctors:: Constructors -* Array slicing:: Indexing, subarrays, and slicing -* Array debug:: Debug mode -* Array members:: Member functions -* Array globals:: Global functions -* Array I/O:: Inputting and Outputting Arrays -* Array storage:: Array storage orders - -Writing expressions with the Array class - -* Expression evaluation:: Expression evaluation order -* Index placeholders:: Index placeholders -* Math functions 1:: Single-argument math functions -* Math functions 2:: Two-argument math functions -* User et:: Declaring your own math functions on arrays -* Where expr:: where statements - -Array Stencils - -* Stencil object:: Declaring stencil objects -* Stencil operator:: Stencil operators -* Stencil customize:: Declaring your own stencil operators -* Stencil apply:: Applying a stencil object - -Customization of the Array class - -* Array multi:: Multicomponent and complex arrays -* Array usertype:: Creating arrays of a user type - -Array indirect addressing - -* Indirection position list:: Indirection using lists of array positions -* Indirection Cartesian product:: Cartesian-product indirection -* Indirection strip list:: Indirection with lists of strips - -The Blitz++ TinyVector class - -* TinyVec params:: Template parameters and types -* TinyVec ctors:: Constructors -* TinyVec members:: Member functions -* TinyVec assignment:: Assignment operators -* TinyVec exprs:: Expressions -* TinyVec globals:: Global functions -* TinyVec arrays:: Arrays of TinyVector -* TinyVec io:: Input/output - -Parallel Computing with Blitz++ - -* Thread safety:: Blitz++ and thread safety - -Random Number Generators in Blitz++ - -* RNG overview:: Overview -* RNG seeding:: Seeding a random number generator -* RNG details:: Detailed description of RNGs -* RNG params:: Template parameters -* RNG members:: Member functions -* RNG listings:: Detailed listing of RNGs - -Numeric properties functions in Blitz++ - -* Numeric limits:: Introduction -* Numeric functions:: Function descriptions - -@end detailmenu -@end menu - -@node Introduction, Arrays, , Top -@chapter Introduction -@menu -* about:: About this document -* platforms:: Platform/compiler notes -* download:: How to download Blitz++ -* install:: Installation and porting -* compiling:: Compiling with Blitz++ -* legal:: Licensing terms -* help:: Mailing lists and support -@end menu -@include about.texi -@include platforms.texi -@include download.texi -@include install.texi -@include compiling.texi -@include legal.texi -@include help.texi - -@node Arrays, Array Expressions, Introduction, Top -@chapter Arrays -@cindex Array -@findex Array -@menu -* Array intro:: Getting started -* Array types:: Public types -* Array ctors:: Constructors -* Array slicing:: Indexing, subarrays, and slicing -* Array debug:: Debug mode -* Array members:: Member functions -* Array globals:: Global functions -* Array I/O:: Inputting and Outputting Arrays -* Array storage:: Array storage orders -@end menu -@include arrays-intro.texi -@include arrays-types.texi -@include arrays-ctors.texi -@include arrays-slicing.texi -@include arrays-debug.texi -@include arrays-members.texi -@include arrays-globals.texi -@include arrays-io.texi -@include arrays-storage.texi - -@node Array Expressions, Stencils, Arrays, Top -@chapter Array Expressions -@menu -* Expression evaluation:: Expression evaluation order -* Index placeholders:: Index placeholders -* Math functions 1:: Single-argument math functions -* Math functions 2:: Two-argument math functions -* User et:: Declaring your own math functions on arrays -* Where expr:: where statements -@end menu -@include arrays-expr.texi - -@node Stencils, Customized Arrays, Array Expressions, Top -@chapter Stencils -@menu -* Stencil object:: Declaring stencil objects -* Stencil operator:: Stencil operators -* Stencil customize:: Declaring your own stencil operators -* Stencil apply:: Applying a stencil object -@end menu -@include arrays-stencils.texi - -@node Customized Arrays, Indirection, Stencils, Top -@chapter Multicomponent, complex, and user type Arrays -@menu -* Array multi:: Multicomponent and complex arrays -* Array usertype:: Creating arrays of a user type -@end menu -@include arrays-multi.texi -@include arrays-usertype.texi - -@node Indirection, TinyVector, Customized Arrays, Top -@chapter Indirection -@menu -* Indirection position list:: Indirection using lists of array positions -* Indirection Cartesian product:: Cartesian-product indirection -* Indirection strip list:: Indirection with lists of strips -@end menu -@include arrays-indirect.texi - -@node TinyVector, Parallel Computing, Indirection, Top -@chapter TinyVector -@menu -* TinyVec params:: Template parameters and types -* TinyVec ctors:: Constructors -* TinyVec members:: Member functions -* TinyVec assignment:: Assignment operators -* TinyVec exprs:: Expressions -* TinyVec globals:: Global functions -* TinyVec arrays:: Arrays of TinyVector -* TinyVec io:: Input/output -@end menu -@include tinyvector.texi - -@node Parallel Computing, Random Number Generators, TinyVector, Top -@chapter Parallel Computing with Blitz++ -@menu -* Thread safety:: Blitz++ and thread safety -@end menu -@include parallel.texi - -@node Random Number Generators, Numeric properties, Parallel Computing, Top -@chapter Random Number Generators -@menu -* RNG overview:: Overview -* RNG seeding:: Seeding a random number generator -* RNG details:: Detailed description of RNGs -* RNG params:: Template parameters -* RNG members:: Member functions -* RNG listings:: Detailed listing of RNGs -@end menu -@include random.texi - -@node Numeric properties, FAQ, Random Number Generators, Top -@chapter Numeric properties -@menu -* Numeric limits:: Introduction -* Numeric functions:: Function descriptions -@end menu -@include numinquire.texi - -@node FAQ, Keyword Index, Numeric properties, Top -@chapter Frequently Asked Questions -@include faq.texi - -@node Keyword Index, Concept Index, FAQ, Top -@unnumbered Blitz Keyword Index -@printindex fn - -@node Concept Index, , Keyword Index, Top -@unnumbered Concept Index -@printindex cp - -@c --------------------------------------------------------------------- -@c Epilogue -@c --------------------------------------------------------------------- - -@bye diff --git a/doc/blitztiny.jpg b/doc/blitztiny.jpg deleted file mode 100644 index dd726835..00000000 Binary files a/doc/blitztiny.jpg and /dev/null differ diff --git a/doc/compiling.texi b/doc/compiling.texi deleted file mode 100644 index 3a09e310..00000000 --- a/doc/compiling.texi +++ /dev/null @@ -1,75 +0,0 @@ - -@node compiling, legal, install, Introduction -@section Compiling with Blitz++ - -@subsection Header files -@cindex header files, convention -@cindex @file{blitz} header files - -Blitz++ follows an X-windows style convention for header files. All headers -are referred to with a prefix of @file{blitz}. For example, to use the -@code{Array} class, one needs to include @code{} instead -of just @code{}. To make this work, the main Blitz++ directory -must be in your include path. For example, if Blitz++ was installed in -@file{/software/Blitz++}, you will need to compile with @code{-I -/software/Blitz++}. - -If you have root privileges, you may want to put in a symbolic link from the -standard include path (e.g. @file{/usr/include/blitz/}) to the @code{blitz} -directory of the distribution. This will allow you to omit the @code{-I -...} option when compiling. - -@subsection Linking to the Blitz++ library -@findex libblitz.a -@cindex library (@file{libblitz.a}) - -The Blitz++ library file @file{libblitz.a} contains a few pieces of global -data. You should ensure that the @file{lib} subdirectory of the Blitz++ -distribution is in your library path (e.g. -@code{-L/usr/local/blitz-0.5/lib}) and include @code{-lblitz} on your -command line. If you use math functions, you should also compile with -@code{-lm}. - -@subsection An example Makefile -@cindex makefile, example - -Here is a typical skeletal Makefile for compiling with Blitz++ under gcc: - -@smallexample -@include examples/makefile.example -@end smallexample - -There are more example makefiles in the examples, testsuite, and benchmarks -directories of the distribution. - -@subsection Explicit instantiation -@cindex explicit instantiation -@cindex Array explicit instantiation - -It is not possible to do explicit instantiation of Blitz++ arrays. If you -aren't familiar with explicit instantiation of templates, then this fact -will never bother you. - -The reason is that explicit instantiation results in all members of a class -template being instantiated. This is @strong{not} the case for implicit -instantiation, in which only required members are instantiated. The -@code{Array} class contains members which are not valid for all types -@code{T}: for example, the binary AND operation @code{&=} is nonsensical if -@code{T=float}. If you attempt to explicitly instantiate an array class, -e.g. - -@code{template class Array;} - -then you will be rewarded with many compile errors, due to methods such as -@code{&=} which are nonsensical for @code{float}. - -As some consolation, explicit instantiation would not be much help with -Blitz++ arrays. The typical use for explicit instantiation is to -instantiate all the templates you need in one compilation unit, and turn off -implicit instantiation in the others -- to avoid duplicate instantiations -and reduce compile times. This is only possible if you can predict ahead of -time what needs instantiation. Easy for simple templates, but impossible -for classes like @code{Array}. Almost every line of code you write using -@code{Array} will cause a different set of things to be implicitly -instantiated. - diff --git a/doc/constants.texi b/doc/constants.texi deleted file mode 100644 index e69de29b..00000000 diff --git a/doc/copyright.texi b/doc/copyright.texi deleted file mode 100644 index a25daf48..00000000 --- a/doc/copyright.texi +++ /dev/null @@ -1,14 +0,0 @@ - - -The Blitz++ library is licensed under both the GPL and the more permissive -``Blitz++ Artistic License''. Take your pick. They are detailed in GPL -and LICENSE, respectively. The artistic license is more appropriate for -commercial use, since it lacks the ``viral'' properties of the GPL. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Copyright @copyright{} 1996--2003 Free Software Foundation, Inc. - diff --git a/doc/download.texi b/doc/download.texi deleted file mode 100644 index 0d7c73ce..00000000 --- a/doc/download.texi +++ /dev/null @@ -1,12 +0,0 @@ - -@node download, install, platforms, Introduction -@section How to download Blitz++ - -The Blitz++ project is now being served via SourceForge. -To download the Blitz++ library, go to the blitz project web page, at -@uref{http://sourceforge.net/projects/blitz}. - -More information about supported platforms and C++ compilers is available -in this document or on the official Blitz++ home page, at -@uref{http://oonumerics.org/blitz}. - diff --git a/doc/doxygen/CMakeLists.txt b/doc/doxygen/CMakeLists.txt deleted file mode 100644 index b08fd7f9..00000000 --- a/doc/doxygen/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -# Configure the script and the Doxyfile, then add target - -find_package(Doxygen) - -if (DOXYGEN_FOUND) - if (NOT DOXYGEN_DOT_PATH) - get_filename_component(DOT_PATH ${DOT} PATH) - endif() - - option(DISABLE_REFMAN_PDF "Disable generation of refman.pdf" OFF) - - set(top_srcdir ${CMAKE_SOURCE_DIR}) - set(top_builddir ${CMAKE_BINARY_DIR}) - set(enable_latex_docs YES) - set(enable_html_docs YES) - set(enable_dot YES) - set(PACKAGE_NAME ${CMAKE_PROJECT_NAME}) - set(PACKAGE_VERSION ${blitz_VERSION}) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) - - add_custom_target(doxygen ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) - install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html/ DESTINATION ${CMAKE_INSTALL_DOCDIR}/reference) - add_dependencies(blitz-doc doxygen) - - if (NOT DISABLE_REFMAN_PDF) - add_custom_command(OUTPUT latex/refman.pdf - COMMAND make - WORKING_DIRECTORY latex - DEPENDS doxygen) - - add_custom_target(doxygen-latex DEPENDS latex/refman.pdf) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf DESTINATION ${CMAKE_INSTALL_DOCDIR}) - add_dependencies(blitz-doc doxygen-latex) - endif() - - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "html;latex;doxygen-warning") -endif() diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in deleted file mode 100644 index 8584609b..00000000 --- a/doc/doxygen/Doxyfile.in +++ /dev/null @@ -1,2513 +0,0 @@ -# Doxyfile 1.8.15 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the configuration -# file that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# https://www.gnu.org/software/libiconv/ for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = @PACKAGE_NAME@ - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = "Version @PACKAGE_VERSION@" - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = YES - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = @top_srcdir@/ \ - @top_builddir@/ - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = YES - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 2 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) - -ALIASES = "docme=\todo\nDoc me!" - -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice -# sources only. Doxygen will then generate output that is more tailored for that -# language. For instance, namespaces will be presented as modules, types will be -# separated into more groups, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_SLICE = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: -# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser -# tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is -# Fortran), use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See https://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 0. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 0 - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = YES - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO, these declarations will be -# included in the documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = YES - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. -# The default value is: NO. - -WARN_AS_ERROR = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = doxygen-warning - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = @top_srcdir@/blitz/ \ - @top_srcdir@/random/ - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: https://www.gnu.org/software/libiconv/) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, -# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice. - -FILE_PATTERNS = *.h - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = @top_srcdir@/blitz/generate \ - @top_srcdir@/blitz/array-old.h \ - @top_srcdir@/blitz/bench.h \ - @top_srcdir@/blitz/benchext.h \ - @top_srcdir@/blitz/bzdebug.h \ - @top_srcdir@/blitz/tau.h \ - @top_srcdir@/blitz/timer.h \ - @top_srcdir@/blitz/limits-hack.h \ - @top_srcdir@/blitz/promote.h \ - @top_srcdir@/blitz/promote-old.h \ - @top_srcdir@/blitz/array \ - @top_srcdir@/blitz/meta \ - @top_srcdir@/blitz/applics.h \ - @top_srcdir@/blitz/funcs.h \ - @top_srcdir@/blitz/mathfunc.h \ - @top_srcdir@/blitz/mathf2.h \ - @top_srcdir@/blitz/numinquire.h \ - @top_srcdir@/blitz/update.h \ - @top_srcdir@/blitz/tiny.h \ - @top_srcdir@/blitz/tinymat.h \ - @top_srcdir@/blitz/tinymatexpr.h \ - @top_srcdir@/blitz/matbops.h \ - @top_srcdir@/blitz/matexpr.h \ - @top_srcdir@/blitz/matuops.h \ - @top_srcdir@/blitz/matref.h \ - @top_srcdir@/blitz/randref.h \ - @top_srcdir@/blitz/vecexpr.h \ - @top_srcdir@/blitz/vecexprwrap.h \ - @top_srcdir@/blitz/vecwhere.h - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = CVS \ - *.in - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = @top_srcdir@/examples - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = *.cpp - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# entity all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = YES - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = YES - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see https://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 2 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = @enable_html_docs@ - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# https://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - -# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML -# documentation will contain a main index with vertical navigation menus that -# are dynamically created via Javascript. If disabled, the navigation index will -# consists of multiple levels of tabs that are statically embedded in every HTML -# page. Disable this option to support browsers that do not have Javascript, -# like the Qt help browser. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_MENUS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: https://developer.apple.com/xcode/), introduced with OSX -# 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy -# genXcode/_index.html for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = YES - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# https://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/ - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /