diff --git a/.gitignore b/.gitignore index 78cbee510..cbbe65e8d 100644 --- a/.gitignore +++ b/.gitignore @@ -41,7 +41,6 @@ lex.*.cc ## Top-level generated artifacts Default -docs html latex tmp diff --git a/conf/libdap.m4 b/conf/libdap.m4 index 54d1aede9..c2ababcb9 100644 --- a/conf/libdap.m4 +++ b/conf/libdap.m4 @@ -312,3 +312,113 @@ dnl AC_SUBST([DAP_CLIENT_STATIC_LIBS]) dnl AC_SUBST([DAP_SERVER_STATIC_LIBS]) dnl AC_SUBST([DAP_ROOT]) ]) + +# LIBDAP_CHECK_CURL() +# Resolve libcurl flags, preferring pkg-config and falling back to curl-config. +AC_DEFUN([LIBDAP_CHECK_CURL], +[ + AC_ARG_WITH([curl], + [AS_HELP_STRING([--with-curl=PFX],[curl/libcurl prefix; overrides pkg-config and curl-config fallback])], + [with_curl_prefix="$withval"], + [with_curl_prefix=""]) + + curlprivatereq= + curlprivatelibs= + libdap_libcurl_module='libcurl >= 7.19.0' + + AC_MSG_CHECKING([for libcurl]) + + AS_IF([test -n "$with_curl_prefix"], + [ + CURL_CONFIG="$with_curl_prefix/bin/curl-config" + AS_IF([test ! -x "$CURL_CONFIG"], + [AC_MSG_ERROR([You set the curl prefix directory to $with_curl_prefix, but curl-config is not there.])]) + + CURL_LIBS=`$CURL_CONFIG --libs` + CURL_CFLAGS=`$CURL_CONFIG --cflags` + curlprivatelibs=$CURL_LIBS + AC_MSG_RESULT([yes; used $CURL_CONFIG]) + ], + [ + PKG_CHECK_MODULES([CURL],[$libdap_libcurl_module], + [ + curlprivatereq=$libdap_libcurl_module + curlprivatelibs=`$PKG_CONFIG --static --libs libcurl` + AC_MSG_RESULT([yes; used pkg-config]) + ], + [ + AC_PATH_PROG([CURL_CONFIG], [curl-config], [no]) + AS_IF([test "x$CURL_CONFIG" = xno], + [AC_MSG_ERROR([I could not find libcurl])]) + + version_libcurl=`$CURL_CONFIG --version | sed 's@libcurl \(.*\)@\1@'` + AS_VERSION_COMPARE(["$version_libcurl"], ["7.19.0"], + [AC_MSG_ERROR([I could not find libcurl 7.19.0 or newer, found $version_libcurl])]) + + CURL_LIBS=`$CURL_CONFIG --libs` + CURL_CFLAGS=`$CURL_CONFIG --cflags` + curlprivatelibs=$CURL_LIBS + AC_MSG_RESULT([yes; used curl-config and found version $version_libcurl]) + ]) + ]) + + AC_SUBST([curlprivatereq]) + AC_SUBST([curlprivatelibs]) + AC_SUBST([CURL_LIBS]) + AC_SUBST([CURL_CFLAGS]) +]) + +# LIBDAP_CHECK_XML2() +# Resolve libxml2 flags, preferring pkg-config and falling back to xml2-config. +AC_DEFUN([LIBDAP_CHECK_XML2], +[ + AC_ARG_WITH([xml2], + [AS_HELP_STRING([--with-xml2=PFX],[libxml2 prefix; overrides pkg-config and xml2-config fallback])], + [with_xml2_prefix="$withval"], + [with_xml2_prefix=""]) + + xmlprivatereq= + xmlprivatelibs= + libdap_libxml2_module='libxml-2.0 >= 2.7.0' + + AC_MSG_CHECKING([for libxml2]) + + AS_IF([test -n "$with_xml2_prefix"], + [ + XML2_CONFIG="$with_xml2_prefix/bin/xml2-config" + AS_IF([test ! -x "$XML2_CONFIG"], + [AC_MSG_ERROR([You set the libxml2 prefix directory to $with_xml2_prefix, but xml2-config is not there.])]) + + XML2_LIBS=`$XML2_CONFIG --libs` + XML2_CFLAGS=`$XML2_CONFIG --cflags` + xmlprivatelibs=$XML2_LIBS + AC_MSG_RESULT([yes; used $XML2_CONFIG]) + ], + [ + PKG_CHECK_MODULES([XML2],[$libdap_libxml2_module], + [ + xmlprivatereq=$libdap_libxml2_module + xmlprivatelibs=`$PKG_CONFIG --libs libxml-2.0` + AC_MSG_RESULT([yes; used pkg-config]) + ], + [ + AC_PATH_PROG([XML2_CONFIG], [xml2-config], [no]) + AS_IF([test "x$XML2_CONFIG" = xno], + [AC_MSG_ERROR([I could not find libxml2])]) + + version_libxml2=`$XML2_CONFIG --version` + AS_VERSION_COMPARE(["$version_libxml2"], ["2.7.0"], + [AC_MSG_ERROR([I could not find libxml2 2.7.0 or newer])]) + + XML2_LIBS=`$XML2_CONFIG --libs` + XML2_CFLAGS=`$XML2_CONFIG --cflags` + xmlprivatelibs=$XML2_LIBS + AC_MSG_RESULT([yes; used xml2-config and found version $version_libxml2]) + ]) + ]) + + AC_SUBST([xmlprivatereq]) + AC_SUBST([xmlprivatelibs]) + AC_SUBST([XML2_LIBS]) + AC_SUBST([XML2_CFLAGS]) +]) diff --git a/configure.ac b/configure.ac index 4ab001e68..4b4d113fa 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,3 @@ - dnl -*- autoconf -*- dnl Process this file with autoconf to produce a configure script. @@ -57,15 +56,6 @@ AC_DEFINE_UNQUOTED(CVER, "$PACKAGE_VERSION", [Client version number]) AC_DEFINE_UNQUOTED(DVR, "libdap/$PACKAGE_VERSION", [Client name and version combined]) AC_SUBST(DVR) -dnl This block of code can be removed since the DDS now only builds the v3.2 DDX. -dnl jhrg 2/4/22 -dnl -dnl Use one of these two blocks to build a DAP2 or DAP 3.2 DDX -dnl AC_DEFINE(DAP2_DDX, 1, [Build the DAP 2 version of the DDX]) -dnl AC_SUBST(DAP2_DDX) -dnl AC_DEFINE(DAP3_2_DDX, 1, [Build the DAP 3.2 version of the DDX]) -dnl AC_SUBST(DAP3_2_DDX) - AS_IF([echo $PACKAGE_VERSION | grep -q '^\([[0-9]]\)*\.\([[0-9]]*\)\.\([[0-9]]*\)-\([[0-9]]*\)$'], [PACKAGE_MAJOR_VERSION=`echo $PACKAGE_VERSION | sed 's@^\([[0-9]]\)*\.\([[0-9]]*\)\.\([[0-9]]*\)-\([[0-9]]*\)$@\1@'` PACKAGE_MINOR_VERSION=`echo $PACKAGE_VERSION | sed 's@^\([[0-9]]\)*\.\([[0-9]]*\)\.\([[0-9]]*\)-\([[0-9]]*\)$@\2@'` @@ -197,17 +187,7 @@ AS_VERSION_COMPARE(["$bison_version"], ["3.0"], AC_MSG_RESULT([found version $bison_version]) dnl Checks for header files. -AC_HEADER_DIRENT -# Use this once we can update to autoconf 2.70. jhrg 2/13/25 AC_CHECK_INCLUDES_DEFAULT -AC_PROG_EGREP -AC_CHECK_HEADERS([getopt.h limits.h locale.h]) - - -AC_HEADER_SYS_WAIT - -AC_CHECK_HEADERS_ONCE([fcntl.h malloc.h memory.h stddef.h stdlib.h string.h strings.h unistd.h pthread.h]) -AC_CHECK_HEADERS_ONCE([sys/param.h sys/time.h]) -AC_CHECK_HEADERS_ONCE([netinet/in.h]) +AC_CHECK_HEADERS_ONCE([stdlib.h string.h unistd.h pthread.h]) dnl Do this because we have had a number of problems with the UUID header/library AC_CHECK_HEADERS([uuid/uuid.h],[found_uuid_uuid_h=true],[found_uuid_uuid_h=false]) @@ -215,13 +195,6 @@ AC_CHECK_HEADERS([uuid.h],[found_uuid_h=true],[found_uuid_h=false]) AS_IF([test $found_uuid_uuid_h = true -o $found_uuid_h = true], [], [AC_MSG_ERROR([Could not find uuid.h])]) -dnl Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_C_INLINE -AC_TYPE_SIZE_T -AC_CHECK_MEMBERS([struct stat.st_blksize]) -AC_STRUCT_TM -AC_C_VOLATILE AC_C_BIGENDIAN # This is used by the DMR tests which must choose the correct set of baselines @@ -243,7 +216,7 @@ AS_IF([test "x$enable_runtime_endian_check" = "xyes"], [ # Checks for library functions. dnl using AC_CHECK_FUNCS does not run macros from gnulib. -AC_CHECK_FUNCS([alarm atexit bzero dup2 getcwd getpagesize localtime_r memmove memset pow putenv setenv strchr strerror strtol strtoul timegm mktime]) +AC_CHECK_FUNCS([atexit timegm mktime]) gl_SOURCE_BASE(gl) gl_M4_BASE(gl/m4) @@ -251,210 +224,8 @@ gl_MODULES(regex btyeswap) gl_INIT -AC_ARG_WITH([curl], [AS_HELP_STRING([--with-curl=pfx],[curl/libcurl prefix; overrides other tests including pkgconfig])], - with_curl_prefix="$withval", with_curl_prefix="") - -dnl I wrote these checks because we need the *-config scripts to build, so -dnl the AC_CHECK_LIB macro is not needed. - -curlprivatereq= -curlprivatelibs= -curl_set= - -if test -n "$with_curl_prefix" -a -x $with_curl_prefix/bin/curl-config -then - AC_MSG_NOTICE([Using $with_curl_prefix as the curl prefix directory.]) - CURL_LIBS="`$with_curl_prefix/bin/curl-config --libs`" - CURL_STATIC_LIBS=$CURL_LIBS - curlprivatelibs="`$with_curl_prefix/bin/curl-config --libs`" - CURL_CFLAGS="`$with_curl_prefix/bin/curl-config --cflags`" - curl_set="yes" -elif test -n "$with_curl_prefix" -then - AC_MSG_ERROR([You set the curl-prefix directory to $with_curl_prefix, but curl-config is not there.]) -fi - -if test -z "$curl_set" -then - # curlprivatereq= - # curlprivatelibs= - libdap_pkgconfig_libcurl=yes - libdap_libcurl_module='libcurl >= 7.19.0' - PKG_CHECK_MODULES([CURL],[$libdap_libcurl_module],, - [libdap_pkgconfig_libcurl=no]) - AC_MSG_CHECKING([for libcurl]) - - if test $libdap_pkgconfig_libcurl = 'yes' - then - curlprivatereq=$libdap_libcurl_module - CURL_STATIC_LIBS="`$PKG_CONFIG --static --libs libcurl`" - AC_MSG_RESULT([yes; used pkg-config]) - elif curl-config --version > /dev/null 2>&1 - then - version_libcurl=`curl-config --version | sed 's@libcurl \(.*\)@\1@'` - - AS_VERSION_COMPARE(["$version_libcurl"], ["7.19.0"], - [AC_MSG_ERROR([I could not find libcurl 7.19.0 or newer, found $version_libcurl])]) - - CURL_LIBS="`curl-config --libs`" - CURL_STATIC_LIBS=$CURL_LIBS - curlprivatelibs="`curl-config --libs`" - CURL_CFLAGS="`curl-config --cflags`" - AC_MSG_RESULT([yes; used curl-config and found version $version_libcurl]) - else - AC_MSG_ERROR([I could not find libcurl]) - fi -fi - -AC_SUBST([curlprivatereq]) -AC_SUBST([curlprivatelibs]) -AC_SUBST([CURL_LIBS]) -AC_SUBST([CURL_STATIC_LIBS]) -AC_SUBST([CURL_CFLAGS]) - -dnl ******** new version, breaks the bes package step ******** -dnl jhrg 8/31/20 -dnl -dnl AC_ARG_WITH(xml2, -dnl [AS_HELP_STRING([--with-xml2=PFX],[dnl Prefix where libxml2 is installed (optional). This will override pkgconfig, etc.])], -dnl [with_xml2_prefix="$withval"], -dnl [with_xml2_prefix=""]) -dnl -dnl AS_IF([test -n "$with_xml2_prefix" -a ! -x $with_xml2_prefix/bin/xml2-config], -dnl [AC_MSG_ERROR([You set the libxml2 prefix directory to $with_xml2_prefix, but xml2-config is not there.])]) -dnl -dnl xmlprivatereq= -dnl xmlprivatelibs= -dnl xml_set= -dnl -dnl # I changed this code so that it searches for libxml2 using xml2-config -dnl # first, then pkg-config. This works more reliably when working on OSX -dnl # given Apple's penchant for moving the lib. jhrg 8/25/20 -dnl -dnl AS_IF([test -n "$with_xml2_prefix" -a -x $with_xml2_prefix/bin/xml2-config], -dnl [ -dnl AC_MSG_NOTICE([Using $with_xml2_prefix as the libxml2 prefix directory.]) -dnl XML2_LIBS="`$with_xml2_prefix/bin/xml2-config --libs`" -dnl XML2_CFLAGS="`$with_xml2_prefix/bin/xml2-config --cflags`" -dnl xml_set="yes" -dnl ]) -dnl -dnl # Try using the xml2-config script. -dnl -dnl AS_IF([test -z "$xml_set" & xml2-config --version > /dev/null 2>&1], -dnl [ -dnl AC_MSG_CHECKING([for libxml2]) -dnl version_libxml2=`xml2-config --version` -dnl -dnl AS_VERSION_COMPARE(["$version_libxml2"], ["2.7.0"], -dnl [AC_MSG_ERROR([I could not find libxml2 2.7.0 or newer])]) -dnl -dnl XML2_LIBS="`xml2-config --libs`" -dnl XML2_CFLAGS="`xml2-config --cflags`" -dnl xmlprivatelibs="`xml2-config --libs `" -dnl -dnl # If XML2_CFLAGS does not have -I that ends in /libxml2, append that to -dnl # the string bound to -I. Assume there is only on -I in XML2_CFLAGS. jhrg 8/25/20 -dnl AS_IF([echo $XML2_CFLAGS | grep -v -e '-I.*/libxml2'], -dnl [XML2_CFLAGS=`echo $XML2_CFLAGS | sed "s@\(-I.*\)@\1/libxml2/@g"`]) -dnl -dnl AC_MSG_RESULT([yes; used xml2-config and found version $version_libxml2]) -dnl xml_set=yes -dnl ]) -dnl -dnl # If not found, try pkg-config -dnl AS_IF([test -z "$xml_set"], -dnl [ -dnl libdap_libxml2_module='libxml-2.0 >= 2.7.0' -dnl PKG_CHECK_MODULES([XML2], [libdap_libxml2_module], -dnl [libdap_pkgconfig_libxml2=yes], -dnl [libdap_pkgconfig_libxml2=no]) -dnl AS_IF([test $libdap_pkgconfig_libxml2 = yes], -dnl [ -dnl XML2_LIBS="`$PKG_CONFIG --libs libxml-2.0`" -dnl XML2_CFLAGS="`$PKG_CONFIG --cflags libxml-2.0`" -dnl AC_MSG_RESULT([yes; used pkg-config]) -dnl xmlprivatereq=$libdap_libxml2_module -dnl xml_set=yes -dnl ], -dnl [ -dnl AC_MSG_ERROR([I could not find xml2-config]) -dnl ]) -dnl ]) -dnl -dnl AS_IF([test -z "xml_set"], -dnl [ -dnl AC_MSG_ERROR([I could not find xml2-config]) -dnl ]) -dnl -dnl AC_SUBST([xmlprivatereq]) -dnl AC_SUBST([xmlprivatelibs]) -dnl AC_SUBST([XML2_LIBS]) -dnl AC_SUBST([XML2_CFLAGS]) -dnl -dnl ******** end new, broken-for-the-bes version ********* - -dnl Version of XML2 configuration from git commit 8624abec8e3d510508c3c97ac60082700995af2c -dnl jhrg 8/31/20 - -AC_ARG_WITH(xml2,[ --with-xml2=PFX Prefix where libxml2 is installed (optional). This will override pkgconfig, etc.], - with_xml2_prefix="$withval", with_xml2_prefix="") - -xmlprivatereq= -xmlprivatelibs= -xml_set= - -if test -n "$with_xml2_prefix" -a -x $with_xml2_prefix/bin/xml2-config -then - AC_MSG_NOTICE([Using $with_xml2_prefix as the libxml2 prefix directory.]) - XML2_LIBS="`$with_xml2_prefix/bin/xml2-config --libs`" - dnl XML2_STATIC_LIBS=$XML2_LIBS - xmlprivatelibs="`$with_xml2_prefix/bin/xml2-config --libs`" - XML2_CFLAGS="`$with_xml2_prefix/bin/xml2-config --cflags`" - xml_set="yes" -elif test -n "$with_xml2_prefix" -then - AC_MSG_ERROR([You set the libxml2 prefix directory to $with_xml2_prefix, but xml2-config is not there.]) -fi - -if test -z "$xml_set" -then -libdap_pkgconfig_libxml2=yes -libdap_libxml2_module='libxml-2.0 >= 2.7.0' -PKG_CHECK_MODULES([XML2],[$libdap_libxml2_module], ,[libdap_pkgconfig_libxml2=no]) -AC_MSG_CHECKING([for libxml2]) -if test $libdap_pkgconfig_libxml2 = 'yes' -then - xmlprivatereq=$libdap_libxml2_module - dnl XML2_STATIC_LIBS="`$PKG_CONFIG --static --libs libxml-2.0`" - XML2_LIBS="`$PKG_CONFIG --libs libxml-2.0`" - AC_MSG_RESULT([yes; used pkg-config]) -elif xml2-config --version > /dev/null 2>&1 -then - version_libxml2=`xml2-config --version` - - AS_VERSION_COMPARE(["$version_libxml2"], ["2.7.0"], - [AC_MSG_ERROR([I could not find libxml2 2.7.0 or newer])]) - - XML2_LIBS="`xml2-config --libs`" - dnl XML2_STATIC_LIBS=$XML2_LIBS - XML2_CFLAGS="`xml2-config --cflags`" - xmlprivatelibs="`xml2-config --libs `" - dnl ` - AC_MSG_RESULT([yes; used xml2-config and found version $version_libxml2]) -else - AC_MSG_ERROR([I could not find xml2-config]) -fi -fi - -AC_SUBST([xmlprivatereq]) -AC_SUBST([xmlprivatelibs]) -AC_SUBST([XML2_LIBS]) -dnl AC_SUBST([XML2_STATIC_LIBS]) -AC_SUBST([XML2_CFLAGS]) - -dnl End old version of XML2 configuration. jhrg 8/31/20 -dnl ******** +LIBDAP_CHECK_CURL +LIBDAP_CHECK_XML2 dnl Check for the RHEL 8 requirement libtirpc and its headers. dnl jhrg 6/23/22 @@ -476,20 +247,20 @@ AC_CHECK_LIB([crypto], [OpenSSL_add_all_algorithms], [CRYPTO_LIBS=""]) AC_SUBST([CRYPTO_LIBS]) -# AM_PATH_CPPUNIT(1.12.0, -# [AM_CONDITIONAL([CPPUNIT], [true])], -# [ -# PKG_CHECK_MODULES(CPPUNIT, [cppunit >= 1.12.0], -# [AM_CONDITIONAL([CPPUNIT], [true])], -# [AM_CONDITIONAL([CPPUNIT], [false])] -# ) -# ] -# ) - -AM_PATH_CPPUNIT(1.12.0, - [AM_CONDITIONAL([CPPUNIT], [true])], - [AM_CONDITIONAL([CPPUNIT], [false])] -) +AC_ARG_ENABLE([cppunit], + [AS_HELP_STRING([--disable-cppunit], [Skip CppUnit detection and disable CppUnit-based tests (default: detect)])], + [], + [enable_cppunit=auto]) + +AS_IF([test "x$enable_cppunit" = xno], + [CPPUNIT_CFLAGS= + CPPUNIT_LIBS= + AM_CONDITIONAL([CPPUNIT], [false])], + [AM_PATH_CPPUNIT(1.12.0, + [AM_CONDITIONAL([CPPUNIT], [true])], + [AM_CONDITIONAL([CPPUNIT], [false])])]) +AC_SUBST([CPPUNIT_CFLAGS]) +AC_SUBST([CPPUNIT_LIBS]) DODS_DEBUG_OPTION @@ -503,8 +274,10 @@ AC_ARG_ENABLE([asan], dnl Removed -fsanitize=undefined. jhrg 2/21/25 -ASAN_OPTIONS="-fsanitize=address -fno-omit-frame-pointer" -CXX_FLAGS_CHECK([$ASAN_OPTIONS], [has_asan=yes], [has_asan=no]) +has_asan=no +AS_IF([test x$enable_asan = xyes], + [ASAN_OPTIONS="-fsanitize=address -fno-omit-frame-pointer" + CXX_FLAGS_CHECK([$ASAN_OPTIONS], [has_asan=yes], [has_asan=no])]) AS_IF([test x$enable_asan = xyes -a x$has_asan = xyes], [AC_MSG_NOTICE([Building Address Sanitizer version]) @@ -525,9 +298,9 @@ AS_IF([test x$enable_batest = xyes ], AC_ARG_ENABLE([leaks], [AS_HELP_STRING([--enable-leaks], [Run unit tests on OSX using the 'leaks' if available (default: no)])]) -# The 'leaks' tool on OSX can be used to test if a program leaks memory. -# Look for 'leaks' and set LEAKS to it if found, else set it to 'no' -AC_CHECK_PROG(LEAKS, [leaks], [leaks], [no]) +LEAKS=no +AS_IF([test x$enable_leaks = xyes], + [AC_CHECK_PROG(LEAKS, [leaks], [leaks], [no])]) AS_IF([test x$enable_leaks = xyes -a x$LEAKS != xno], [AC_MSG_NOTICE([Will run unit-tests using leaks]) @@ -553,18 +326,20 @@ AS_IF([test x$enable_developer = xyes], AC_ARG_ENABLE([coverage], [AS_HELP_STRING([--enable-coverage], [Build so tests emit coverage data and enable coverage target (default: no)])]) -AC_CHECK_LIB([gcov], [gcov_open], [GCOV_LIBS="-lgcov"], [GCOV_LIBS=]) - -AS_IF([test x$enable_coverage = xyes && which gcov], - [AC_MSG_NOTICE([Building coverage version]) - AM_CONDITIONAL([ENABLE_COVERAGE], [true]) - AS_IF([gcov -help | grep LLVM], - [GCOVR_FLAGS=], - [GCOVR_FLAGS="-k -e '.*Test.cc' -e '.*T.cc' -e '.*-test.cc'" - LIBS="-lgcov $LIBS"])], +GCOVR_FLAGS= +AS_IF([test x$enable_coverage = xyes], + [AC_PATH_PROG([GCOV], [gcov], [no]) + AS_IF([test "x$GCOV" != xno], + [AC_MSG_NOTICE([Building coverage version]) + AM_CONDITIONAL([ENABLE_COVERAGE], [true]) + AS_IF([$GCOV -help 2>&1 | grep -q LLVM], + [GCOVR_FLAGS=], + [GCOVR_FLAGS="-k -e '.*Test.cc' -e '.*T.cc' -e '.*-test.cc'" + LIBS="-lgcov $LIBS"])], + [AC_MSG_NOTICE([Not building coverage version]) + AC_MSG_NOTICE([Check that gcov is on your PATH]) + AM_CONDITIONAL([ENABLE_COVERAGE], [false])])], [AC_MSG_NOTICE([Not building coverage version]) - AS_IF([test x$enable_coverage = xyes], - [AC_MSG_NOTICE([Check that gcov is on your PATH])]) AM_CONDITIONAL([ENABLE_COVERAGE], [false])]) AC_SUBST([GCOVR_FLAGS]) diff --git a/docs/2007_10_10_Gallagher et al_DAP 2.0_ESE-RFC-004v1.2.pdf b/docs/2007_10_10_Gallagher et al_DAP 2.0_ESE-RFC-004v1.2.pdf new file mode 100644 index 000000000..1b161c05c Binary files /dev/null and b/docs/2007_10_10_Gallagher et al_DAP 2.0_ESE-RFC-004v1.2.pdf differ diff --git a/docs/AGENTS.md b/docs/AGENTS.md new file mode 100644 index 000000000..43c546363 --- /dev/null +++ b/docs/AGENTS.md @@ -0,0 +1,109 @@ +# AGENTS.md + +## Scope + +These instructions apply to the entire `libdap4` repository. + +## Project Context + +- `libdap4` is a legacy C++ implementation of DAP2/DAP4 with long-lived downstream consumers. +- Prioritize compatibility, behavioral stability, and small, reviewable diffs. +- Prefer minimal, targeted changes over broad refactors. + +## Primary Build Systems + +- Prefer autotools for day-to-day work unless the task is explicitly CMake-focused. +- Keep both autotools and CMake build paths healthy when changing shared build logic. + +## Autotools Workflow (preferred) + +For a fresh git checkout: + +```sh +autoreconf --force --install --verbose +./configure --prefix=$prefix --enable-developer +make -j +make -j check +``` + +For release-tarball style builds: + +```sh +./configure +make -j +make -j check +``` + +Notes: + +- Check that the environment variable 'prefix' is defined before running any command that uses it. +- Use `--prefix=` when installation path matters. +- Use `TESTSUITEFLAGS=-j` with `make check` when parallelizing tests. +- If `make check` fails due to missing `config.guess`, link `conf/config.guess` into `tests/` per `tests/README`. + +## CMake Workflow (supported) + +- Presets are defined in `CMakePresets.json`. +- Common presets: `default`, `debug`, `developer`, `asan`. + +Typical flow: + +```sh +cmake --preset developer +cmake --build --preset developer -j +ctest --preset developer --output-on-failure +``` + +## Testing Expectations + +- For code changes, run focused tests in affected areas first, then broader suites when risk is higher. +- Autotools default: `make -j check` +- CMake default: `ctest --preset default` (or `developer` for debug/developer builds) +- Unit/integration labels are available through CMake test presets (`unit`, `int`). +- If tests are flaky or expected-fail in legacy areas, call that out explicitly in your summary. + +## Documentation And Doxygen + +- Doxygen docs are built with: + +```sh +make docs +``` + +- Inputs are `doxy.conf` and `main_page.doxygen` (generated from `.in` templates by configure). +- When updating doc config/templates, keep generated and template files consistent with the chosen build workflow. + +## Legacy C++ Constraints + +- Match local style in touched files; do not perform unrelated formatting sweeps. +- Avoid API/ABI-impacting changes unless explicitly requested. +- Be conservative with ownership/lifetime changes in pointer-heavy code. +- Parser/scanner sources are generated (`*.tab.cc`, `*.tab.hh`, `lex.*.cc`); edit `*.yy`/`*.lex` sources, not generated outputs, unless the task explicitly requires generated-file updates. + +## Tooling And Quality + +- `clang-format` and pre-commit are configured (`README.pre-commit.md`, `.pre-commit-config.yaml`). +- Prefer running formatting/hooks only on changed files relevant to the task. +- Address sanitizer is supported (`--enable-asan` in autotools, `asan` preset in CMake) for memory-safety debugging. + +## Change Discipline + +- Do not revert unrelated local changes in a dirty worktree. +- Keep edits tightly scoped to the request. +- If you encounter unexpected repository changes during work, stop and ask how to proceed. +- Do not run destructive git commands unless explicitly requested. + +## Review Priorities + +When asked to review: + +1. Behavioral regressions in protocol/data-model behavior +2. Memory/resource safety and ownership lifetime issues +3. Parser/serialization correctness and edge cases +4. Build-system regressions (autotools and CMake) +5. Missing or weak regression coverage + +## Communication + +- State assumptions and environment details explicitly (build system, preset/configure flags, test scope). +- If full validation is not run, say exactly what was run and what was not. diff --git a/docs/configure-ac-refactor-plan.md b/docs/configure-ac-refactor-plan.md new file mode 100644 index 000000000..a7b2bf3c3 --- /dev/null +++ b/docs/configure-ac-refactor-plan.md @@ -0,0 +1,276 @@ +# `configure.ac` Refactoring Plan + +## Goal + +Reduce `./configure` runtime first, and make `configure.ac` easier to maintain second, by removing probes that no longer affect compilation, linking, tests, or generated build files. + +## Current observations + +The current `configure.ac` mixes four different kinds of logic: + +1. Required dependency discovery that still feeds `Makefile.am`, `dap-config`, or `libdap.pc`. +2. Optional build/test toggles that still drive Automake conditionals. +3. Legacy portability probes that define `HAVE_*` symbols no longer consumed by the code. +4. Historical commented-out blocks and shell-heavy custom logic that increase maintenance cost. + +A quick symbol audit against the tree shows several header and function checks now appear unused outside generated files: + +### Header probes with no current consumer + +- `HAVE_GETOPT_H` +- `HAVE_LIMITS_H` +- `HAVE_FCNTL_H` +- `HAVE_MEMORY_H` +- `HAVE_STDDEF_H` +- `HAVE_NETINET_IN_H` + +### Function probes with no current consumer + +- `HAVE_ALARM` +- `HAVE_BZERO` +- `HAVE_GETCWD` +- `HAVE_LOCALTIME_R` +- `HAVE_MEMMOVE` +- `HAVE_MEMSET` +- `HAVE_POW` +- `HAVE_PUTENV` +- `HAVE_STRCHR` +- `HAVE_STRERROR` +- `HAVE_STRTOL` +- `HAVE_STRTOUL` + +There are also legacy/commented sections that add noise without affecting the generated build: + +- The old DAP2/DDX block near the top. +- A large commented-out alternate libxml2 detection implementation. +- Commented-out CppUnit detection alternatives. + +## Refactoring strategy + +Apply the work in phases so behavior stays stable while the script gets smaller and faster. + +## Phase 1: Build an evidence-backed inventory + +Before deleting checks, produce a short inventory that classifies every probe in `configure.ac` as: + +- `required`: affects compile flags, link flags, generated scripts, or test baselines +- `optional`: controls developer/test-only behavior +- `dead`: no current consumer in source or Automake input +- `replace`: still needed, but should use a simpler macro or less shell code + +This inventory should include: + +- `AC_CHECK_HEADERS*` +- `AC_CHECK_FUNCS` +- `AC_CHECK_LIB` +- `PKG_CHECK_MODULES` +- `AM_CONDITIONAL` +- custom macros such as `DODS_CHECK_SIZES`, `OX_RHEL8_TIRPC`, and `DODS_DEBUG_OPTION` + +Deliverable: a checked-in table or comment block that makes future cleanup decisions auditable. + +## Phase 2: Remove dead portability probes first + +This is the safest runtime win. + +### Remove unused header checks + +Delete header probes whose `HAVE_*` symbols have no current non-generated consumer, starting with: + +- `getopt.h` +- `limits.h` +- `fcntl.h` +- `memory.h` +- `stddef.h` +- `netinet/in.h` + +Then review low-value probes with only one or two consumers and replace them with unconditional standard includes where practical: + +- `malloc.h` +- `sys/time.h` +- `sys/param.h` +- `locale.h` + +For each remaining conditional include, decide whether the project still cares about the target platforms that required it. If not, remove both the probe and the `#ifdef HAVE_...` branches in code. + +### Remove unused function checks + +Delete unused entries from the large `AC_CHECK_FUNCS([...])` list. Keep only functions whose `HAVE_*` symbols still affect source behavior, such as: + +- `atexit` +- `dup2` +- `getpagesize` +- `setenv` +- `timegm` +- `mktime` + +If some of those can now be assumed on supported platforms, remove their probes too and simplify the code accordingly. + +Expected result: fewer compile/link test fragments generated and executed by `configure`, with minimal behavior risk. + +## Phase 3: Replace obsolete Autoconf portability macros + +Several generic portability macros are legacy baggage for a modern C++14 library. + +Review and likely remove or justify: + +- `AC_HEADER_DIRENT` +- `AC_HEADER_SYS_WAIT` +- `AC_C_CONST` +- `AC_C_INLINE` +- `AC_TYPE_SIZE_T` +- `AC_STRUCT_TM` +- `AC_C_VOLATILE` +- `AC_CHECK_MEMBERS([struct stat.st_blksize])` + +These should stay only if one of the following is true: + +- a generated symbol is still used in the code, or +- the project explicitly supports platforms old enough to require the probe + +If there is no such support requirement, remove them. For a C++14 codebase, many of these are unnecessary. + +## Phase 4: Simplify dependency detection + +Most remaining configure time is likely spent in external dependency checks, compiler/link tests, and shelling out to helper tools. + +### Curl and libxml2 + +The current curl and libxml2 blocks duplicate logic across: + +- explicit `--with-...` prefixes +- `pkg-config` +- `*-config` scripts + +Refactor each dependency probe into one small macro with this order: + +1. honor explicit `--with-...` prefix +2. try `PKG_CHECK_MODULES` +3. fall back to `curl-config` or `xml2-config` only if required for platforms still in scope + +Also: + +- move each dependency block into `conf/*.m4` or a local helper macro +- delete the large commented-out alternate libxml2 implementation +- keep only the variables actually consumed by the build (`*_LIBS`, `*_CFLAGS`, private pkg-config fields) + +If the supported platforms all provide `pkg-config`, the biggest runtime and maintenance win is to make `pkg-config` the only non-prefix path and drop `curl-config`/`xml2-config` fallback logic entirely. + +### Library checks + +Review `AC_CHECK_LIB` use for: + +- `pthread` +- `uuid` +- `crypto` +- `gcov` + +If these libraries are already discovered transitively through pkg-config or are guaranteed on supported platforms, avoid redundant link probes. In particular: + +- `pthread` may be better handled with standard thread detection macros instead of a raw `-lpthread` check. +- `gcov` should be checked only when `--enable-coverage` is requested, not on every run. + +## Phase 5: Make optional developer/test features lazy + +Some probes should only run when the related feature is requested. + +### Coverage + +Move all coverage detection behind `--enable-coverage=yes`: + +- `AC_CHECK_LIB([gcov], ...)` +- `which gcov` +- `gcov -help | grep LLVM` + +No coverage-related work should happen in the default configure path. + +### Leaks + +Only call `AC_CHECK_PROG(LEAKS, ...)` when `--enable-leaks=yes` is requested. On non-macOS builders this is pure overhead. + +### CppUnit + +CppUnit detection should run only if unit tests that require it will be built. If test builds are optional, gate `AM_PATH_CPPUNIT` behind an explicit option such as `--enable-cppunit-tests` or equivalent test toggle. + +### AddressSanitizer + +The ASan compiler flag probe is reasonable, but it should only run when `--enable-asan=yes` is requested. + +Expected result: the common `./configure` path avoids feature checks for coverage, leaks, ASan, and optional test frameworks. + +## Phase 6: Replace shell-heavy parsing with simpler M4/shell patterns + +The version parsing and some helper checks currently use repeated `grep`/`sed` pipelines. They are not the main runtime cost, but they do make the script harder to maintain. + +Refactor to: + +- parse version components once +- avoid repeated external `sed` calls where shell parameter expansion or a single helper macro is enough +- consolidate repeated `if test -n "$prefix" -a -x ...` patterns into helper macros + +Also clean up: + +- commented historical notes that no longer guide current behavior +- duplicated or stale comments about removed build paths + +## Phase 7: Reorganize the file for maintainability + +After functional cleanup, split `configure.ac` into clearer sections: + +1. package/version setup +2. toolchain setup +3. core compile environment +4. required dependencies +5. optional developer/test features +6. generated files + +Move reusable dependency logic into local `.m4` macros under `conf/` so `configure.ac` becomes mostly orchestration rather than embedded shell script. + +Recommended first extractions: + +- `LIBDAP_CHECK_CURL` +- `LIBDAP_CHECK_XML2` +- `LIBDAP_CHECK_OPTIONAL_TEST_TOOLS` + +## Suggested implementation order + +1. Remove dead `AC_CHECK_HEADERS*` entries. +2. Remove dead `AC_CHECK_FUNCS` entries. +3. Gate coverage, leaks, ASan, and CppUnit detection behind explicit enable options. +4. Delete commented-out legacy blocks. +5. Simplify curl and libxml2 detection. +6. Review/remove obsolete portability macros. +7. Extract the remaining dependency logic into local `.m4` helpers. + +This order gives early runtime improvements before the larger structural cleanup. + +## Validation plan + +For each cleanup step, verify both generated configuration and real builds. + +Minimum validation matrix: + +- `autoreconf -fi` +- default `./configure` +- `./configure --enable-developer` +- `./configure --enable-asan` if supported by the compiler +- `./configure --enable-coverage` +- one build with explicit `--with-curl=...` or `--with-xml2=...` if those paths are retained + +Then run: + +- a normal library build +- at least one unit-test build +- the Autotest suite in `tests/` + +Also measure configure runtime before and after each major phase so the cleanup stays aligned with the primary goal. + +## Success criteria + +The refactor is complete when: + +- every remaining probe has a documented consumer +- default `./configure` runs fewer external checks than today +- optional developer/test probes are skipped unless explicitly requested +- `configure.ac` no longer contains large commented-out alternative implementations +- dependency detection logic is short enough that a maintainer can reason about it without reading historical branches diff --git a/docs/deep-dive-codex.md b/docs/deep-dive-codex.md new file mode 100644 index 000000000..1ee0bd728 --- /dev/null +++ b/docs/deep-dive-codex.md @@ -0,0 +1,57 @@ +**Deep Dive Summary** + +- This repo is a mature C++ DAP stack with clear layering: core protocol/data model (`libdap`), HTTP/client transport (`libdapclient`), and server filter helpers (`libdapserver`). +- It implements both DAP2 and DAP4 in one codebase, with shared type infrastructure and protocol-specific parsers/serializers. +- The implementation is production-oriented and heavily tested, but there are a few explicit DAP4 “not yet implemented” paths you should treat as known gaps. + +**Library Boundaries** + +- `libdap` is the core and includes DAP2 + DAP4 model, parsers, marshalling, CE logic, and common utilities: [CMakeLists.txt](/Users/jimg/src/opendap/hyrax_git/libdap4/CMakeLists.txt#L275), [Makefile.am](/Users/jimg/src/opendap/hyrax_git/libdap4/Makefile.am#L64), [libdap.pc.in](/Users/jimg/src/opendap/hyrax_git/libdap4/libdap.pc.in). +- `libdapclient` adds client-facing connect/config/HTTP layers: [CMakeLists.txt](/Users/jimg/src/opendap/hyrax_git/libdap4/CMakeLists.txt#L299), [Makefile.am](/Users/jimg/src/opendap/hyrax_git/libdap4/Makefile.am#L71), [libdapclient.pc.in](/Users/jimg/src/opendap/hyrax_git/libdap4/libdapclient.pc.in). +- `libdapserver` contains server filter primitives for CGI-style handlers: [CMakeLists.txt](/Users/jimg/src/opendap/hyrax_git/libdap4/CMakeLists.txt#L324), [Makefile.am](/Users/jimg/src/opendap/hyrax_git/libdap4/Makefile.am#L76), [libdapserver.pc.in](/Users/jimg/src/opendap/hyrax_git/libdap4/libdapserver.pc.in). + +**DAP2 Implementation Path** + +- DAP2 client workflow centers on `Connect`: request DAS/DDS/DDX/DataDDS, parse MIME metadata, parse DDS text, then XDR-unmarshal data: [Connect.h](/Users/jimg/src/opendap/hyrax_git/libdap4/Connect.h#L127), [Connect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/Connect.cc#L71), [Connect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/Connect.cc#L250). +- DAP2 CE selection/function flow is parser-driven (`ce_expr.yy/.lex`) into `ConstraintEvaluator` clauses: [ConstraintEvaluator.h](/Users/jimg/src/opendap/hyrax_git/libdap4/ConstraintEvaluator.h#L41), [ConstraintEvaluator.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/ConstraintEvaluator.cc#L333). +- Server-side response shaping is `DODSFilter` + CE + marshalling: [DODSFilter.h](/Users/jimg/src/opendap/hyrax_git/libdap4/DODSFilter.h#L174), [DODSFilter.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/DODSFilter.cc#L89). + +**DAP4 Implementation Path** + +- DAP4 client workflow is `D4Connect`: build DAP4 query keys (`dap4.ce`, `dap4.checksum`), fetch `.dmr` or `.dap`, parse DMR first chunk, then stream-unmarshal payload: [D4Connect.h](/Users/jimg/src/opendap/hyrax_git/libdap4/D4Connect.h#L54), [D4Connect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/D4Connect.cc#L359), [D4Connect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/D4Connect.cc#L374). +- DMR is the DAP4 root object and supports DAP2↔DAP4 transforms (`build_using_dds`, `getDDS`): [DMR.h](/Users/jimg/src/opendap/hyrax_git/libdap4/DMR.h#L48), [DMR.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/DMR.cc#L156), [DMR.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/DMR.cc#L202). +- DMR parsing uses libxml2 SAX with strict/permissive map handling mode: [D4ParserSax2.h](/Users/jimg/src/opendap/hyrax_git/libdap4/D4ParserSax2.h#L75), [D4ParserSax2.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/D4ParserSax2.cc#L310). + +**Data Model Core** + +- `BaseType` is the type-invariant root used across DAP2 and DAP4; it tracks projection state, attrs, parentage, transform hooks: [BaseType.h](/Users/jimg/src/opendap/hyrax_git/libdap4/BaseType.h#L118). +- Type enum includes classic DAP2 and DAP4 additions (`Int64`, `UInt64`, `Enum`, `Opaque`, `Group`): [Type.h](/Users/jimg/src/opendap/hyrax_git/libdap4/Type.h#L94). + +**Transport and I/O** + +- HTTP transport is libcurl-based `HTTPConnect`, with DAP header parsing, optional cache integration, cookies/proxy/no_proxy, and C++ stream mode for DAP4: [HTTPConnect.h](/Users/jimg/src/opendap/hyrax_git/libdap4/http_dap/HTTPConnect.h#L52), [HTTPConnect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/http_dap/HTTPConnect.cc#L557). +- DAP4 payload transfer uses chunked framing and receiver-makes-right byte handling: [chunked_stream.h](/Users/jimg/src/opendap/hyrax_git/libdap4/chunked_stream.h#L11), [chunked_istream.h](/Users/jimg/src/opendap/hyrax_git/libdap4/chunked_istream.h#L42), [chunked_istream.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/chunked_istream.cc#L94). + +**Serialization** + +- DAP2 serialization path uses XDR stream/file marshallers: [XDRStreamMarshaller.h](/Users/jimg/src/opendap/hyrax_git/libdap4/XDRStreamMarshaller.h#L53). +- DAP4 serialization path uses `D4StreamMarshaller` with optional checksums and stream-first encoding: [D4StreamMarshaller.h](/Users/jimg/src/opendap/hyrax_git/libdap4/D4StreamMarshaller.h#L58), [D4StreamMarshaller.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/D4StreamMarshaller.cc#L118). + +**Constraint/Function Engines** + +- DAP4 CE parser/evaluator (`d4_ce`) supports array slicing, shared-dim behavior, map pruning, and filter clauses; includes XXS-aware error redaction: [D4ConstraintEvaluator.h](/Users/jimg/src/opendap/hyrax_git/libdap4/d4_ce/D4ConstraintEvaluator.h#L48), [D4ConstraintEvaluator.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/d4_ce/D4ConstraintEvaluator.cc#L42), [D4ConstraintEvaluator.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/d4_ce/D4ConstraintEvaluator.cc#L181). +- Server function registry is a singleton (`ServerFunctionsList`) used by DAP2 and DAP4 function paths: [ServerFunctionsList.h](/Users/jimg/src/opendap/hyrax_git/libdap4/ServerFunctionsList.h#L46), [ServerFunctionsList.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/ServerFunctionsList.cc#L65), [D4FunctionEvaluator.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/d4_function/D4FunctionEvaluator.cc#L104). + +**Tooling and Quality** + +- Scale: ~207 `.cc`, ~234 `.h`, 7 parser grammars, 7 lexer files. +- Strong testing surface: autotest integration suites and ~60 CppUnit-style unit tests, including DAP4 round-trips and DMR↔DAP2 translation: [tests/Makefile.am](/Users/jimg/src/opendap/hyrax_git/libdap4/tests/Makefile.am#L27), [unit-tests/Makefile.am](/Users/jimg/src/opendap/hyrax_git/libdap4/unit-tests/Makefile.am#L83). +- Pre-commit and CI hooks are in place for formatting/lint sanity: [pre-commit.yml](/Users/jimg/src/opendap/hyrax_git/libdap4/.github/workflows/pre-commit.yml#L1), [.pre-commit-config.yaml](/Users/jimg/src/opendap/hyrax_git/libdap4/.pre-commit-config.yaml#L1). + +**Notable Gaps/Risks** + +- DAP4 error-body handling is explicitly unfinished in client paths (`FIXME`/throw): [D4Connect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/D4Connect.cc#L131), [D4Connect.cc](/Users/jimg/src/opendap/hyrax_git/libdap4/D4Connect.cc#L404). +- Some legacy/deprecated config/proxy paths remain and increase maintenance surface: [RCReader.h](/Users/jimg/src/opendap/hyrax_git/libdap4/RCReader.h#L93), [README.dodsrc](/Users/jimg/src/opendap/hyrax_git/libdap4/README.dodsrc). +- Mixed old/new build systems (Autotools + CMake) are both supported; good for portability, but raises drift risk: [Makefile.am](/Users/jimg/src/opendap/hyrax_git/libdap4/Makefile.am#L1), [CMakeLists.txt](/Users/jimg/src/opendap/hyrax_git/libdap4/CMakeLists.txt#L1). + +If you want, I can do a second-pass deep dive focused only on one of these: `libdap` internals, `libdapclient` transport/cache behavior, or `libdapserver` handler integration patterns. diff --git a/gtest-migration-plan.md b/gtest-migration-plan.md new file mode 100644 index 000000000..c86020254 --- /dev/null +++ b/gtest-migration-plan.md @@ -0,0 +1,313 @@ +# GoogleTest Migration Plan For Autotools Unit Tests + +## Scope + +This plan covers only the CppUnit-based tests that are built and run through the autotools `make check` path. + +Included: + +- `unit-tests/Makefile.am` +- `http_dap/unit-tests/Makefile.am` +- `d4_ce/unit-tests/Makefile.am` + +Excluded: + +- CMake test wiring +- The autotest integration suites in `tests/Makefile.am`, except for shared test support such as `libtest-types.a` + +The current autotools CppUnit surface is 63 test binaries: + +- 59 in `unit-tests/` +- 3 in `http_dap/unit-tests/` +- 1 in `d4_ce/unit-tests/` + +This plan uses `AGENTS.md` guidance and the architecture/build notes in `docs/deep-dive-codex.md`. + +## Goals + +- Replace CppUnit with GoogleTest for the autotools unit-test executables +- Preserve `make check` behavior for autotools users +- Keep the existing test executable names stable during migration +- Avoid changing the autotest integration suites unless needed for shared support code +- Minimize risk by converting low-coupling tests first and network-sensitive tests last + +## Build-System Plan + +1. Replace the configure-time dependency check in `configure.ac`. + Remove the `AM_PATH_CPPUNIT(...)` dependency gate and introduce a GoogleTest detection path with a new automake conditional such as `GTEST`. + +2. Add GoogleTest build variables to the autotools test directories. + In `unit-tests/Makefile.am`, `http_dap/unit-tests/Makefile.am`, and `d4_ce/unit-tests/Makefile.am`, replace `$(CPPUNIT_CFLAGS)` and `$(CPPUNIT_LIBS)` with GoogleTest equivalents such as `$(GTEST_CFLAGS)`, `$(GTEST_LIBS)`, and `$(PTHREAD_LIBS)`. + +3. Keep the autotools execution model stable. + Preserve `check_PROGRAMS = $(UNIT_TESTS)` and `TESTS = $(UNIT_TESTS)` so `make check` still builds and runs the same executables. + +4. Introduce a shared GoogleTest runner helper. + Replace `run_tests_cppunit.h` with a GoogleTest-compatible helper that preserves: + - `-d` / `-D` debug flag handling + - basic help behavior + - optional single-test selection from the command line + +5. Keep dual-framework support during the transition. + Do not remove CppUnit support until all autotools test binaries have been migrated and verified. This avoids turning the work into a single high-risk cutover. + +6. Remove CppUnit only after parity is reached. + After all autotools tests pass under GoogleTest, remove: + - CppUnit configure checks + - CppUnit conditionals in autotools files + - CppUnit headers and helper files + - CppUnit references in build/install documentation + +## Test Conversion Rules + +Use one consistent conversion pattern across the tree: + +- `CPPUNIT_TEST_SUITE` to `TEST_F` +- `CPPUNIT_ASSERT(expr)` to `EXPECT_TRUE(expr)` or `ASSERT_TRUE(expr)` +- `CPPUNIT_ASSERT_EQUAL(a, b)` to `EXPECT_EQ(a, b)` +- `CPPUNIT_ASSERT_THROW(expr, Ex)` to `EXPECT_THROW(expr, Ex)` +- `CPPUNIT_FAIL(msg)` to `FAIL() << msg` +- `setUp()` / `tearDown()` to `SetUp()` / `TearDown()` + +Preserve test logic during the framework switch: + +- keep fixture setup and cleanup behavior unchanged +- keep environment-variable setup unchanged +- keep test asset paths and generated `test_config.h` usage unchanged +- do not rename executables in the first migration pass + +## Recommended Batches + +The batches are ordered to establish the GoogleTest pattern on low-risk tests first, then move outward toward parser, translation, cache, and HTTP/network-sensitive areas. + +### Batch 0: Build Skeleton And Proof Of Pattern + +Purpose: + +- land the autotools GoogleTest dependency wiring +- add the shared runner/helper +- prove the conversion style in one test per subtree + +Tests: + +- `unit-tests/BaseTypeTest` +- `d4_ce/unit-tests/D4ConstraintEvaluatorTest` +- `http_dap/unit-tests/HTTPConnectTest` + +Exit criteria: + +- `autoreconf -fi` +- `./configure` +- each converted binary builds and runs under `make check` + +### Batch 1: Scalar And Utility Core Tests + +Purpose: + +- convert low-coupling tests with straightforward fixtures and assertions +- validate the assertion mapping and runner helper + +Tests: + +- `unit-tests/RegexTest` +- `unit-tests/ByteTest` +- `unit-tests/MIMEUtilTest` +- `unit-tests/generalUtilTest` +- `unit-tests/parserUtilTest` +- `unit-tests/ErrorTest` +- `unit-tests/SignalHandlerTest` +- `unit-tests/Int8Test` +- `unit-tests/Int16Test` +- `unit-tests/UInt16Test` +- `unit-tests/Int32Test` +- `unit-tests/UInt32Test` +- `unit-tests/Int64Test` +- `unit-tests/UInt64Test` +- `unit-tests/Float32Test` +- `unit-tests/Float64Test` + +Why this batch: + +- mostly local assertions +- little shared state +- low filesystem and network sensitivity + +### Batch 2: Core DAP2 Model And Container Tests + +Purpose: + +- convert the core object-model tests that exercise the library boundaries described in `docs/deep-dive-codex.md` + +Tests: + +- `unit-tests/ArrayTest` +- `unit-tests/GridTest` +- `unit-tests/AttrTableTest` +- `unit-tests/DASTest` +- `unit-tests/DDSTest` +- `unit-tests/SequenceTest` +- `unit-tests/BaseTypeFactoryTest` +- `unit-tests/D4BaseTypeFactoryTest` +- `unit-tests/ConstraintEvaluatorTest` +- `unit-tests/ServerFunctionsListUnitTest` +- `unit-tests/BaseTypeTest` + +Why this batch: + +- still mostly in-process +- builds confidence in fixture conversion before older legacy-style tests are touched + +### Batch 3: Legacy CppUnit Pattern Tests + +Purpose: + +- convert the older tests that still use the historic `*T` naming and older fixture style + +Tests: + +- `unit-tests/marshT` +- `unit-tests/arrayT` +- `unit-tests/attrTableT` +- `unit-tests/structT` +- `unit-tests/sequenceT` +- `unit-tests/ddsT` +- `unit-tests/dasT` +- `unit-tests/ancT` +- `unit-tests/util_mitTest` + +Why this batch: + +- these are likely to require the most mechanical cleanup +- separating them avoids slowing down the cleaner modern conversions + +### Batch 4: Parser, XML, And Translation Tests + +Purpose: + +- migrate the tests that depend on XML parsing, generated config, and DAP2/DAP4 translation paths + +Tests: + +- `unit-tests/DDXParserTest` +- `unit-tests/D4ParserSax2Test` +- `unit-tests/DMRTest` +- `unit-tests/DmrRoundTripTest` +- `unit-tests/DmrToDap2Test` +- `unit-tests/IsDap4ProjectedTest` +- `unit-tests/D4AttributesTest` +- `unit-tests/D4DimensionsTest` +- `unit-tests/D4EnumDefsTest` +- `unit-tests/D4EnumTest` +- `unit-tests/D4GroupTest` +- `unit-tests/D4SequenceTest` +- `unit-tests/D4FilterClauseTest` +- `unit-tests/D4AsyncDocTest` + +Why this batch: + +- these tests are tightly tied to the DAP4 parsing and translation flows +- they are a good midpoint between simple unit tests and stream/cache tests + +### Batch 5: Marshaller, Stream, And Concurrency Tests + +Purpose: + +- convert tests that exercise serialization, streaming, and threading behavior + +Tests: + +- `unit-tests/MarshallerTest` +- `unit-tests/MarshallerFutureTest` +- `unit-tests/MarshallerThreadTest` +- `unit-tests/D4MarshallerTest` +- `unit-tests/D4UnMarshallerTest` +- `unit-tests/D4StreamRoundTripTest` +- `unit-tests/chunked_iostream_test` + +Why this batch: + +- more sensitive to fatal vs non-fatal assertions +- often easier to debug after the general fixture strategy is already proven + +### Batch 6: Cache And Local Filesystem Tests + +Purpose: + +- convert tests that create, clean, or inspect local cache state and generated files + +Tests: + +- `unit-tests/RCReaderTest` +- `unit-tests/DAPCache3Test` +- `unit-tests/ResponseCacheTest` if re-enabled for autotools +- `http_dap/unit-tests/HTTPCacheTest` + +Why this batch: + +- fixture cleanup matters +- stale temp files and cache locks can hide migration bugs + +### Batch 7: HTTP And External-Environment Tests + +Purpose: + +- convert the most environment-sensitive tests last + +Tests: + +- `http_dap/unit-tests/HTTPConnectTest` +- `http_dap/unit-tests/HTTPThreadsConnectTest` + +Why this batch: + +- network behavior and remote state can obscure framework migration issues +- these should be used only after the helper, fixture, and assertion patterns are stable + +### Batch 8: Optional Resource-Heavy Test + +Purpose: + +- convert the largest and least convenient test only after the main migration is complete + +Tests: + +- `unit-tests/BigArrayTest` + +Why this batch: + +- already optional under autotools +- high runtime and resource cost +- poor candidate for early validation + +## Batch Notes + +- `HTTPConnectTest` appears in Batch 0 as a proof case and again in Batch 7 as part of the full HTTP sweep. Treat Batch 0 as the first pilot conversion for that subtree, then complete the remaining HTTP tests together. +- `ResponseCacheTest` is present in the source tree but not listed in `unit-tests/Makefile.am` for `UNIT_TESTS`. If it is intentionally dormant, do not expand scope during the migration. If it is meant to be restored, move it into Batch 6. +- Keep `test_config.h`, `testFile.cc`, `remove_directory.cc`, and `../tests/libtest-types.a` untouched unless the GoogleTest transition forces a narrowly scoped change. + +## Verification Plan + +After each batch: + +1. Run `autoreconf -fi` +2. Run `./configure` +3. Run the relevant subtree target: + - top-level `make check` + - or focused checks in `unit-tests/`, `http_dap/unit-tests/`, and `d4_ce/unit-tests/` +4. Re-run the converted binaries individually when debugging fixture behavior + +Before removing CppUnit entirely: + +1. Run a full top-level `make check` +2. Run at least one developer-style build variant if that is part of normal project use +3. Confirm that the autotest integration suites in `tests/` still build and run unchanged + +## Completion Criteria + +The migration is complete when: + +- all autotools unit-test executables build against GoogleTest +- `make check` succeeds through the autotools path +- CppUnit is no longer referenced by `configure.ac` or the autotools unit-test `Makefile.am` files +- the shared runner helper has been replaced with the GoogleTest version +- build and install documentation no longer claim that CppUnit is required for autotools unit tests diff --git a/windsurf-harnett/rules/doc-check.md b/windsurf-harnett/rules/doc-check.md new file mode 100644 index 000000000..f79a9c292 --- /dev/null +++ b/windsurf-harnett/rules/doc-check.md @@ -0,0 +1,15 @@ +--- +trigger: always_on +--- + +# Documentation Check Rule + +Before planning any code changes: + +1. **Review Architecture & Design**: Check [docs/design.md](../../../docs/design.md) for system architecture, component interactions, and technical specifications. +2. **Verify Requirements**: Consult [docs/prd.md](../../../docs/prd.md) to ensure changes align with product requirements, API specifications, and feature definitions. +3. **Understand User Impact**: Read [docs/prfaq.md](../../../docs/prfaq.md) to consider how changes affect users, compatibility, and use cases. +4. **Check Version Compatibility**: Verify that changes maintain backward compatibility as specified in the documentation. +5. **Consider Format Support**: For changes affecting file formats (NetCDF, CDF, GeoTIFF), ensure compliance with format specifications in the documentation. +6. **Review Build Systems**: For build system changes, ensure both CMake and Autotools configurations are updated consistently. +7. **Update Documentation**: Plan to update relevant documentation if implementing new features or changing existing behavior. diff --git a/windsurf-harnett/rules/local-build-command.md b/windsurf-harnett/rules/local-build-command.md new file mode 100644 index 000000000..04c8f7c4f --- /dev/null +++ b/windsurf-harnett/rules/local-build-command.md @@ -0,0 +1,78 @@ +--- +trigger: model_decision +--- + +# Local Build Commands for NEP + +## When to Use This Rule + +Use these paths for **local development builds on Ed's machine**. +For CI/GitHub Actions, different paths are used (see `.github/workflows/`). + +## Machine-Specific Dependency Paths + +- **HDF5**: `/usr/local/hdf5-2.0.0/` +- **NetCDF-C**: `/usr/local/netcdf-c-4.10.0/` +- **NetCDF-Fortran**: `/usr/local/netcdf-fortran/` (if Fortran enabled) +- **CDF**: `/usr/local/cdf-3.9.1/` (if CDF enabled) +- **GeoTIFF**: System packages (`libgeotiff-dev`, `libtiff-dev`) + +## Runtime Environment + +Before running tests or executables: + +```bash +export LD_LIBRARY_PATH=/usr/local/hdf5-2.0.0/lib:/usr/local/netcdf-c-4.10.0/lib:/usr/local/netcdf-fortran/lib:/usr/local/cdf-3.9.1/lib:$LD_LIBRARY_PATH +``` + +## Build System Options + +### Autotools (Primary) + +Working directory: `/home/ed/NEP` + +**Common configure flags:** + +- `--enable-geotiff` - Enable GeoTIFF reader +- `--enable-cdf` - Enable NASA CDF reader +- `--disable-lz4` - Disable LZ4 compression +- `--disable-bzip2` - Disable bzip2 compression +- `--disable-fortran` - Disable Fortran wrapper library +- `--disable-shared` - Build static libraries only + +**Full build command:** + +```bash +autoreconf -i && \ +CFLAGS="-g -O0" \ +CPPFLAGS="-I/usr/local/hdf5-2.0.0/include -I/usr/local/netcdf-c-4.10.0/include -I/usr/local/netcdf-fortran/include -I/usr/local/cdf-3.9.1/include" \ +LDFLAGS="-L/usr/local/hdf5-2.0.0/lib -L/usr/local/netcdf-c-4.10.0/lib -L/usr/local/netcdf-fortran/lib -L/usr/local/cdf-3.9.1/lib -Wl,-rpath,/usr/local/hdf5-2.0.0/lib -Wl,-rpath,/usr/local/netcdf-c-4.10.0/lib -Wl,-rpath,/usr/local/netcdf-fortran/lib" \ +./configure --enable-geotiff --enable-cdf --disable-fortran --disable-shared --disable-bzip2 --disable-lz4 && \ +make clean && make -j$(nproc) && make check +``` + +### CMake (Alternative) + +**IMPORTANT**: All CMake builds must use the `build` directory, which is git-ignored. + +Working directory: `/home/ed/NEP` + +```bash +mkdir -p build && cd build +cmake .. \ + -DCMAKE_PREFIX_PATH="/usr/local/hdf5-1.14.6_cmake;/usr/local/netcdf-c-4.9.3_cmake;/usr/local/cdf-3.9.1" \ + -DCMAKE_BUILD_TYPE=Debug \ + -DENABLE_GEOTIFF=ON \ + -DENABLE_CDF=ON \ + -DENABLE_FORTRAN=OFF +make -j$(nproc) && ctest +``` + +**Never create CMake build artifacts outside the `build` directory** to avoid cluttering the repository with untracked files. + +## Troubleshooting + +- **"library not found" errors**: Check `LD_LIBRARY_PATH` is set +- **"header not found" errors**: Verify `CPPFLAGS` includes correct paths +- **Link errors**: Ensure `LDFLAGS` includes all dependency lib directories +- **Test failures**: Run `make check VERBOSE=1` for detailed output diff --git a/windsurf-harnett/rules/slow-network.md b/windsurf-harnett/rules/slow-network.md new file mode 100644 index 000000000..752ccfcca --- /dev/null +++ b/windsurf-harnett/rules/slow-network.md @@ -0,0 +1,55 @@ +--- +trigger: always_on +--- + +# Network Resilience Guidelines + +The network here is slow and flakey. Follow these specific retry strategies: + +## HTTP Requests (read_url_content, search_web) + +- **Retry count**: 3 attempts +- **Timeout**: 10 seconds per attempt +- **Backoff**: 5 seconds between retries +- **Pre-check**: Use `curl -I` for connectivity before full requests + +## MCP Server Calls + +- **Retry count**: 5 attempts +- **Delay**: 2 seconds wait between attempts +- **Timeout**: 30 seconds per call +- **Pre-check**: Verify MCP server process is running + +## Git Operations + +- **Retry count**: Immediate retry once +- **Issue**: Distinguish network vs authentication failures +- **Auth failures**: Don't retry - check credentials + +## Diagnostic Commands + +- **Basic connectivity**: `ping -c 1 8.8.8.8` +- **HTTP test**: `curl -I https://github.com` +- **MCP status**: Check process list for MCP server + +## Failure Type Handling + +- **Timeouts**: Retry with exponential backoff +- **Connection refused**: Check if service is running +- **Authentication errors**: Don't retry - fix credentials first +- **DNS failures**: Check `/etc/resolv.conf` and retry + +## Tool-Specific Guidance + +- **bash network commands**: Always test connectivity first +- **file operations**: Local only - no retries needed +- **build commands**: Network-dependent parts need retry logic + +## GitHub Interactions + +- **Prefer GitHub CLI**: Use `gh` command line tool instead of MCP GitHub tools +- **MCP GitHub tools**: Unreliable due to TLS handshake timeouts on slow network +- **Issue creation**: Use `gh issue create --title "..." --body "..."` or `gh issue create --body-file ` +- **PR operations**: Use `gh pr create`, `gh pr view`, `gh pr comment`, etc. +- **Authentication**: Ensure `gh auth status` shows valid credentials before operations +- **Fallback**: If `gh` unavailable, document the action needed and ask user to perform manually diff --git a/windsurf-harnett/skills/linkedin-posts.md b/windsurf-harnett/skills/linkedin-posts.md new file mode 100644 index 000000000..1a4889a31 --- /dev/null +++ b/windsurf-harnett/skills/linkedin-posts.md @@ -0,0 +1,275 @@ +--- +description: Best practices for creating engaging and effective LinkedIn posts +--- + +# LinkedIn Post Creation Skills + +## My Audience Profile + +**Target Audience:** + +- Engineers (software, systems, data) +- Earth scientists (geoscientists, climate scientists, environmental scientists) +- AI/ML engineers +- Scientific researchers and technical professionals + +**Content Approach:** + +- Focus on the work and its benefits, not self-promotion +- Scientific community values substance over personal branding +- Emphasize technical contributions, research findings, and practical applications +- Highlight how the work advances the field or solves real problems +- Share methodologies, insights, and lessons learned +- Maintain professional, research-oriented tone +- Let the work speak for itself + +## Content Strategy + +### Know Your Audience + +- Tailor content to resonate with your professional network +- Understand demographic characteristics (age, gender, location, education, income) +- Understand psychographic characteristics (personality, values, interests, lifestyle, motivations) +- Address topics relevant to your industry and audience's pain points + +### Content Types That Perform Well + +- **Personal stories** of triumph and professional challenges +- **Educational content** - how-to guides, tips, industry insights +- **Case studies and research** - builds credibility and trust +- **Company updates** - focus on value to audience +- **List-style posts** - get more likes and comments +- **Behind-the-scenes** content - authentic look at your work +- **Industry news and trends** - positions you as thought leader + +## Writing Structure + +### Craft a Compelling Hook (First 2 Seconds Count) + +- Start with attention-grabbing first sentence +- Use statistics, quotes, questions, or personal anecdotes +- Create intrigue to make readers click "See more" +- Best headline lengths: 40-49 characters +- Spend 50% of your time on the headline alone + +### Hook Techniques That Work + +- Little-known facts +- Behind-the-scenes insights +- Catchy quotes +- Extraordinary insights +- Compelling statements +- Statistics +- Humor +- How-to offerings + +### Format for Readability + +- **One post = one thesis** - stay focused +- Break walls of text into single-sentence paragraphs +- Use 3-4 hard paragraph breaks after headline to create intrigue +- Keep sentences short and conversational (14-year-old reading level) +- First 3 lines visible before "See more" - make them count +- Use emojis strategically to break up text and add personality +- Use bullet points or numbered lists for tips +- Leave white space between sections + +### Length Guidelines + +- Keep it concise but impactful - every sentence must add value +- Longer posts (up to 15 lines) are acceptable on LinkedIn +- Only first 3 lines visible initially - use as teaser +- Reading online is 25% slower than print - be mindful + +## Engagement Tactics + +### Call-to-Action (CTA) + +- Always include a clear CTA +- Tell audience exactly what to do next +- Options: comment, like, share, click link, answer question +- Specific instructions outperform vague endings +- Posts with engagement get prioritized by LinkedIn algorithm + +### Ask Questions + +- End posts with questions to encourage commenting +- LinkedIn rewards posts with comments +- More comments = higher chance of trending +- Trending posts reach 2nd and 3rd-degree connections +- Question ideas: productivity tools, favorite quotes, career advice, industry tips + +### Tag and Mention + +- @mention relevant connections or influencers +- Tag people who contributed to your story +- When they engage, post reaches their network +- Build relationships and give shoutouts +- Increases visibility exponentially + +## Visual Elements + +### When to Use Visuals + +- Text-only posts often perform best on LinkedIn +- Add visuals when they enhance understanding +- Use charts/diagrams for data, trends, case studies +- Images of people work well (human psychology) +- Infographics for complex information +- Professional-looking visuals only + +### Video Content + +- Videos are 20 times more shareable than other formats +- Keep videos short and professional +- Optimize for mobile viewing +- Video types that work: + - Brand/business origin stories + - How-to demonstrations + - Event previews and releases + - Expert interviews + - Educational lectures and talks + +## Hashtag Strategy + +### Best Practices + +- Use 3-5 relevant hashtags maximum +- Place hashtags at the end of post +- Mix niche and well-known hashtags +- Create one branded hashtag for consistency +- Vary hashtags - don't repeat same ones (algorithm penalty) +- Research hashtags relevant to your topics + +### Popular Hashtags + +- #entrepreneurship, #startups, #smallbusiness +- #marketing, #digitalmarketing, #branding +- #productivity, #motivation, #strategy +- #innovation, #hiringalert +- Industry-specific hashtags + +## Critical Don'ts + +### External Links + +- **Never include external links in the post body** +- LinkedIn punishes posts with external links (low engagement) +- Instead: mention link is in comments section +- Ask connections to like the comment to keep it on top +- LinkedIn wants users to stay on platform + +### Other Mistakes to Avoid + +- Don't use hashtags in headlines +- Don't overuse emojis (few strategic ones only) +- Don't bore your audience +- Don't use professional jargon - keep conversational +- Don't post without proofreading + +## Content Principles + +### Provide Value + +- Share actionable insights, tips, strategies +- Offer industry-specific knowledge +- Provide time-saving hacks +- Address common challenges +- Position yourself as valuable resource + +### Be Authentic + +- Share genuine experiences - successes and failures +- Be relatable and human +- Show personality (don't be afraid to be silly) +- Behind-the-scenes content works well +- Your unique experiences set you apart +- Human-to-human marketing resonates + +### Tell Stories + +- Every story needs: problem, solution, moral +- Stories trigger emotions and sell +- Create sense of kinship and relatability +- Reflect human values +- Make stories unexpected yet relevant +- "Infotainment" - inform and entertain + +### Stay Timely and Relevant + +- Discuss current events impacting your field +- Share insights on new technologies +- Comment on emerging trends +- Keep audience informed +- Establish yourself as thought leader + +## Posting Strategy + +### Timing and Consistency + +- Post at the same time every day +- Consistency helps connections know when to expect content +- Schedule posts for maximum visibility +- Best times vary by audience - test and track + +### Experiment and Adapt + +- Try different formats (lists, quotes, infographics, polls) +- Test what resonates with your audience +- Track which formats get most interaction +- Adapt future posts based on performance +- Don't delete underperforming posts - learn from them + +### Pre-Publish Checklist + +- Review for grammatical errors +- Verify content relevance to professional goals +- Confirm message aligns with intended tone +- Check that it serves your audience +- Ensure authenticity +- Proofread thoroughly + +## Advanced Tactics + +### Offer Intellectual Property (IP) + +- Share valuable resources with your community +- Examples: checklists, templates, how-to guides, scripts +- Based on your professional experience +- Positions you as expert +- Builds trust and authority + +### Use LinkedIn Features + +- Polls for engagement +- Native video uploads (not external links) +- Document uploads for valuable resources +- LinkedIn articles for longer content + +### Algorithm Optimization + +- Posts with good engagement get prioritized +- Comments matter more than likes +- Early engagement (first hour) is critical +- Avoid external links (algorithm penalty) +- Avoid repeating same hashtags (algorithm penalty) +- Plain text posts often outperform media posts + +## Key Metrics to Track + +- Views and impressions +- Comments (most valuable) +- Likes and reactions +- Shares +- Click-through rates +- Profile visits from posts +- Connection requests from posts + +## Remember + +- LinkedIn users come to grow professionally, not kill time +- Focus on educational, informative, relevant content +- Build genuine relationships through two-way conversations +- Consistency beats perfection +- Authenticity wins over polish +- Value to audience is paramount diff --git a/windsurf-harnett/skills/netcdf-architecture/README.md b/windsurf-harnett/skills/netcdf-architecture/README.md new file mode 100644 index 000000000..716f6c965 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/README.md @@ -0,0 +1,61 @@ +# NetCDF-C Architecture Skill + +This Windsurf skill provides comprehensive knowledge of the NetCDF-C library architecture. + +## What This Skill Provides + +- **Dispatch table architecture** understanding +- **Format implementations** (NetCDF-3, HDF5, Zarr, DAP2, DAP4) +- **Data structures** used throughout the codebase +- **I/O layers** and storage backends +- **Component relationships** and data flow + +## Files + +- **SKILL.md** - Main skill file with architecture overview and quick reference +- **references/COMPONENTS.md** - Detailed component descriptions for each library +- **references/DATA-STRUCTURES.md** - Complete data structure documentation +- **references/DISPATCH-TABLES.md** - All dispatch table implementations + +## When to Use + +Use this skill when: + +- Adding new features to NetCDF-C +- Debugging format-specific issues +- Understanding data flow through the library +- Implementing new dispatch tables or storage backends +- Working with metadata structures +- Investigating performance issues + +## Skill Compliance + +This skill follows the Windsurf Agent Skills specification: + +- ✅ Valid name: `netcdf-architecture` (lowercase, hyphens only) +- ✅ Description: Comprehensive, includes keywords and use cases +- ✅ SKILL.md: 371 lines (under 500 line recommendation) +- ✅ Progressive disclosure: Main content in SKILL.md, details in references/ +- ✅ Reference files: One level deep, focused topics +- ✅ Metadata: Author, version, date included + +## Installation + +To use this skill in Windsurf: + +1. Copy the `netcdf-architecture/` directory to your Windsurf skills location +2. The skill will be automatically detected and loaded +3. AI assistants can now access NetCDF-C architecture knowledge + +## Maintenance + +Update this skill when: + +- New dispatch tables are added +- Major architectural changes occur +- New storage backends are implemented +- Data structures are significantly modified + +## Version + +Current version: 1.0 (January 14, 2026) diff --git a/windsurf-harnett/skills/netcdf-architecture/SKILL.md b/windsurf-harnett/skills/netcdf-architecture/SKILL.md new file mode 100644 index 000000000..28dd270f7 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/SKILL.md @@ -0,0 +1,529 @@ +--- +name: netcdf-architecture +description: Understanding the NetCDF-C library architecture including dispatch tables, format implementations (NetCDF-3, HDF5, Zarr, DAP), I/O layers, and metadata structures. Use when working on NetCDF-C codebase, debugging format issues, adding new features, or understanding how different storage backends interact. +metadata: + author: netcdf-analysis + version: '1.0' + date: '2026-01-14' +--- + +# NetCDF-C Architecture Skill + +This skill provides comprehensive knowledge of the NetCDF-C library architecture to help you navigate, understand, and modify the codebase effectively. + +## Overview + +NetCDF-C is a multi-format I/O library built on a **dispatch table architecture** that provides a unified API across 7+ built-in storage formats plus 10 user-defined format (UDF) slots. The core design pattern uses function pointer tables to route operations to format-specific implementations. + +**Built-in formats**: NetCDF-3 (CDF-1/2/5), NetCDF-4/HDF5, Zarr, DAP2, DAP4 +**User-defined formats**: UDF0-UDF9 slots for custom format plugins + +## Core Architecture Pattern + +### Dispatch Table Design + +Every file format implements the same `NC_Dispatch` interface containing ~70 function pointers: + +```c +struct NC_Dispatch { + int model; // Format identifier + int dispatch_version; // Compatibility version + + // File operations + int (*create)(...); + int (*open)(...); + int (*close)(...); + + // Variable I/O + int (*get_vara)(...); + int (*put_vara)(...); + + // Metadata operations + int (*def_dim)(...); + int (*def_var)(...); + int (*put_att)(...); + + // ... ~60 more function pointers +}; +``` + +**Location**: `include/netcdf_dispatch.h` + +### Common File Handle (NC Structure) + +Every open file is represented by an `NC` struct: + +```c +typedef struct NC { + int ext_ncid; // External ID (user-visible) + int int_ncid; // Internal ID (format-specific) + const NC_Dispatch* dispatch; // Function pointer table + void* dispatchdata; // Format-specific metadata + char* path; // File path + int mode; // Open mode flags +} NC; +``` + +**Location**: `include/nc.h` + +## Directory Structure + +### Primary Libraries + +- **`libdispatch/`** - Central routing layer, API entry points, utilities, UDF plugin loading +- **`libsrc/`** - Classic NetCDF-3 implementation (CDF-1, CDF-2, CDF-5) +- **`libsrc4/`** - NetCDF-4 enhanced model coordination +- **`libhdf5/`** - HDF5 storage backend +- **`libnczarr/`** - Zarr cloud-native storage +- **`libdap2/`** + **`oc2/`** - OPeNDAP DAP2 client +- **`libdap4/`** - OPeNDAP DAP4 client +- **`libhdf4/`** - HDF4 file access (optional) +- **User plugins** - External shared libraries for UDF0-UDF9 slots + +### Support Libraries + +- **`include/`** - Public API headers and internal interfaces +- **`libncpoco/`** - Portable components +- **`libncxml/`** - XML parsing for DAP4 +- **`liblib/`** - Additional utilities + +## Key Components by Library + +### libdispatch/ - The Routing Layer + +**Purpose**: Provides unified API facade and routes calls to appropriate format implementations. + +**Critical Files**: + +- `ddispatch.c` - Dispatch initialization, global state management +- `dfile.c` - File open/create orchestration, format detection +- `dvarget.c`, `dvarput.c` - Variable I/O entry points +- `dvar.c`, `datt.c`, `ddim.c` - Metadata operation entry points +- `dinfermodel.c` - Format detection (magic numbers, URLs) + +**Format Detection Logic**: + +1. Check magic number (first 8 bytes) - includes user-defined magic numbers +2. Parse URL scheme (http://, s3://, file://) +3. Analyze mode flags (NC_NETCDF4, NC_CLASSIC_MODEL, NC_UDF0-NC_UDF9, etc.) +4. Select appropriate dispatch table (built-in or user-defined) + +**Utilities**: + +- `ncjson.c` - JSON parsing +- `ncuri.c` - URI parsing +- `dauth.c` - Authentication (includes RC file parsing for UDF configuration) +- `dhttp.c` - HTTP operations +- `ds3util.c` - S3/cloud utilities +- `drc.c` - RC file parsing for UDF plugin configuration +- `dutil.c` - Plugin loading (dlopen/LoadLibrary) + +### libsrc/ - Classic NetCDF-3 + +**Purpose**: Implements traditional binary NetCDF formats. + +**Dispatch Table**: `NC3_dispatcher` in `nc3dispatch.c` + +**Metadata Structure**: `NC3_INFO` - Simple arrays with hashmaps + +**Critical Files**: + +- `nc3dispatch.c` (517 lines) - Dispatch table implementation +- `nc3internal.c` - Metadata management +- `ncx.c` (743KB) - XDR-like encoding/decoding for all data types +- `putget.c` (353KB) - Variable I/O operations +- `attr.c` (47KB) - Attribute operations +- `var.c`, `dim.c` - Variable and dimension management + +**I/O Abstraction (ncio layer)**: + +- `posixio.c` - Standard POSIX file I/O +- `memio.c` - In-memory files +- `httpio.c` - HTTP byte-range access +- `s3io.c` - S3 object storage + +**Data Structures**: + +```c +typedef struct NC3_INFO { + NC_dimarray dims; // Dimensions + NC_attrarray attrs; // Global attributes + NC_vararray vars; // Variables + size_t xsz; // External size + size_t begin_var; // Offset to variables + size_t begin_rec; // Offset to record data + size_t recsize; // Record size + // ... more fields +} NC3_INFO; +``` + +### libsrc4/ - NetCDF-4 Coordination + +**Purpose**: Thin coordination layer for NetCDF-4 enhanced features (groups, user-defined types). + +**Note**: This is NOT a complete implementation - it delegates to HDF5 or Zarr backends. + +**Files**: + +- `nc4dispatch.c` - Minimal initialization +- `nc4attr.c`, `nc4dim.c`, `nc4var.c` - Enhanced metadata operations +- `nc4grp.c` - Group operations +- `nc4type.c` - User-defined type operations +- `nc4internal.c` - Common infrastructure + +### libhdf5/ - HDF5 Storage Backend + +**Purpose**: Implements NetCDF-4 using HDF5 as the storage format. + +**Dispatch Table**: `HDF5_dispatcher` in `hdf5dispatch.c` + +**Metadata Structure**: `NC_FILE_INFO_T` with hierarchical groups + +**Critical Files**: + +- `hdf5dispatch.c` (152 lines) - Dispatch table +- `nc4hdf.c` (87KB) - Core HDF5 integration +- `hdf5open.c` (99KB) - File opening, metadata reading from HDF5 +- `hdf5var.c` (85KB) - Variable I/O with chunking, compression, filters +- `hdf5attr.c` (28KB) - Attribute operations +- `hdf5filter.c` - Filter/compression plugin management +- `H5FDhttp.c` - HTTP virtual file driver for byte-range access + +**Key Data Structures**: + +```c +typedef struct NC_FILE_INFO_T { + NC_GRP_INFO_T* root_grp; // Root group + int no_write; // Read-only flag + void* format_file_info; // HDF5-specific data + // ... more fields +} NC_FILE_INFO_T; + +typedef struct NC_VAR_INFO_T { + NC_OBJ hdr; // Name and ID + NC_GRP_INFO_T* container; // Parent group + size_t ndims; // Number of dimensions + int* dimids; // Dimension IDs + size_t* chunksizes; // Chunk sizes + int storage; // Chunked/contiguous/compact + int endianness; // Byte order + void* filters; // Compression filters + // ... more fields +} NC_VAR_INFO_T; +``` + +**Delegates to**: HDF5 library → HDF5 VFD layer → actual storage + +### libnczarr/ - Zarr Storage + +**Purpose**: Cloud-native storage using Zarr format specification. + +**Dispatch Table**: `NCZ_dispatcher` in `zdispatch.c` + +**Metadata Structure**: `NC_FILE_INFO_T` (same as HDF5) + +**Critical Files**: + +- `zdispatch.c` (323 lines) - Dispatch table +- `zarr.c` - Main Zarr implementation +- `zsync.c` (84KB) - Data synchronization, chunk management +- `zvar.c` (76KB) - Variable operations +- `zfilter.c` - Codec pipeline (compression, filters) +- `zxcache.c` - Chunk caching + +**Storage Abstraction (zmap)**: + +- `zmap.c` - Abstract storage interface +- `zmap_file.c` - Filesystem backend +- `zmap_s3sdk.c` - AWS S3 backend +- `zmap_zip.c` - ZIP archive backend + +**Key Feature**: JSON metadata (.zarray, .zgroup, .zattrs files) + +### libdap2/ + oc2/ - OPeNDAP DAP2 Client + +**Purpose**: Access remote OPeNDAP servers using DAP2 protocol. + +**Dispatch Table**: `NCD2_dispatcher` in `ncd2dispatch.c` + +**Components**: + +- `ncd2dispatch.c` (85KB) - Dispatch implementation +- `getvara.c` (44KB) - Maps NetCDF API to DAP requests +- `constraints.c` - DAP constraint expression handling +- `cache.c` - Response caching + +**OC2 Library** (OPeNDAP Client in `oc2/`): + +- `oc.c` (62KB) - Main client implementation +- `dapparse.c`, `daplex.c` - DDS/DAS parsing +- `ocdata.c` - Data retrieval and decoding +- `occurlfunctions.c` - HTTP/libcurl integration + +### libdap4/ - OPeNDAP DAP4 Client + +**Purpose**: Access remote DAP4 servers (newer protocol). + +**Dispatch Table**: `NCD4_dispatcher` in `ncd4dispatch.c` + +**Critical Files**: + +- `ncd4dispatch.c` (24KB) - Dispatch table +- `d4parser.c` (49KB) - DMR (Dataset Metadata Response) parsing +- `d4data.c` - Binary data handling +- `d4chunk.c` - Chunked response processing +- `d4meta.c` (34KB) - Metadata translation to NetCDF model +- `d4curlfunctions.c` - HTTP operations + +### User-Defined Formats (UDFs) + +**Purpose**: Extensible plugin system for custom file formats and storage backends. + +**Available Slots**: UDF0 through UDF9 (10 independent format slots) + +**Dispatch Tables**: Registered via `nc_def_user_format()` or RC file configuration + +**Key Features**: + +- **Plugin loading**: Automatic loading from RC files during `nc_initialize()` +- **Magic number detection**: Optional automatic format detection +- **Shared libraries**: .so (Unix) or .dll (Windows) plugins +- **Full API support**: Plugins implement complete `NC_Dispatch` interface + +**Plugin Architecture**: + +1. **Dispatch Table**: Plugin provides `NC_Dispatch` structure with function pointers +2. **Initialization Function**: Exported function called during plugin load +3. **Format-Specific Code**: Custom implementation of file I/O and data operations + +**Registration Methods**: + +**Programmatic Registration**: + +```c +// Register UDF in slot 0 with magic number +nc_def_user_format(NC_UDF0 | NC_NETCDF4, &my_dispatcher, "MYFORMAT"); + +// Query registered UDF +NC_Dispatch *disp; +nc_inq_user_format(NC_UDF0, &disp, magic_buffer); +``` + +**RC File Configuration** (`.ncrc`): + +```ini +NETCDF.UDF0.LIBRARY=/usr/local/lib/libmyformat.so +NETCDF.UDF0.INIT=myformat_init +NETCDF.UDF0.MAGIC=MYFORMAT +``` + +**Plugin Loading Process**: + +1. RC files parsed during `nc_initialize()` +2. Library loaded via `dlopen()` (Unix) or `LoadLibrary()` (Windows) +3. Init function located via `dlsym()` or `GetProcAddress()` +4. Init function calls `nc_def_user_format()` to register dispatch table +5. Dispatch table ABI version verified (`NC_DISPATCH_VERSION`) +6. Plugin remains loaded for process lifetime + +**RC File Search Order**: + +1. `$HOME/.ncrc` +2. `$HOME/.daprc` +3. `$HOME/.dodsrc` +4. `$CWD/.ncrc` +5. `$CWD/.daprc` +6. `$CWD/.dodsrc` + +**UDF Slot Modes**: + +- **UDF0, UDF1**: Original slots, mode flags in lower 16 bits +- **UDF2-UDF9**: Extended slots, mode flags in upper 16 bits + +**Pre-defined Dispatch Functions** (for plugin use): + +- `NC_RO_*` - Read-only stubs (return `NC_EPERM`) +- `NC_NOTNC4_*` - Not-NetCDF-4 stubs (return `NC_ENOTNC4`) +- `NC_NOTNC3_*` - Not-NetCDF-3 stubs (return `NC_ENOTNC3`) +- `NC_NOOP_*` - No-operation stubs (return `NC_NOERR`) +- `NCDEFAULT_*` - Generic implementations +- `NC4_*` - NetCDF-4 inquiry functions using internal metadata model + +**Critical Files**: + +- `libdispatch/dfile.c` - UDF dispatch table storage (`UDF0_dispatch_table`, etc.) +- `libdispatch/ddispatch.c` - `nc_def_user_format()`, `nc_inq_user_format()` +- `libdispatch/drc.c` - RC file parsing for UDF configuration +- `libdispatch/dutil.c` - Plugin library loading +- `include/netcdf_dispatch.h` - `NC_Dispatch` structure definition +- `libdispatch/dreadonly.c` - Pre-defined read-only stubs +- `libdispatch/dnotnc*.c` - Pre-defined not-supported stubs + +**Example Plugin Structure**: + +```c +#include "netcdf_dispatch.h" + +static NC_Dispatch my_dispatcher = { + NC_FORMATX_UDF0, // Use UDF slot 0 + NC_DISPATCH_VERSION, // Current ABI version + + NC_RO_create, // Read-only: use predefined function + my_open, // Custom open function + my_close, // Custom close function + NC4_inq, // Use NC4 inquiry defaults + // ... ~70 function pointers total +}; + +// Initialization function - must be exported +int my_plugin_init(void) { + return nc_def_user_format(NC_UDF0 | NC_NETCDF4, + &my_dispatcher, + "MYFMT"); +} +``` + +**Security Considerations**: + +- RC files must specify absolute library paths +- Plugins execute arbitrary code in process space +- Only load trusted libraries +- Library verifies dispatch table ABI version + +**Common Use Cases**: + +- Proprietary or specialized file formats +- Custom storage backends +- Format translation layers +- Domain-specific data formats +- Integration with legacy systems + +## Common Patterns + +### 1. API Call Flow + +``` +User calls nc_get_vara(ncid, varid, start, count, data) + ↓ +libdispatch/dvarget.c + ↓ +Lookup NC* from ncid → get dispatch table + ↓ +dispatch->get_vara(...) + ↓ +Format-specific implementation: + • NC3_get_vara() → ncx.c XDR decode → ncio read + • NC4_get_vara() → HDF5 API → chunk cache → decompress + • NCZ_get_vara() → zmap retrieve → codec pipeline + • NCD2_get_vara() → HTTP request → parse DDS/DAS +``` + +### 2. File Opening + +``` +nc_open(path, mode, &ncid) + ↓ +libdispatch/dfile.c: NC_open() + ↓ +dinfermodel.c: Detect format + • Check magic number + • Parse URL scheme + • Analyze mode flags + ↓ +Select dispatch table + ↓ +dispatch->open(path, mode, ...) + ↓ +Format-specific open implementation + ↓ +Return ncid to user +``` + +### 3. Metadata Access + +All formats use indexed structures for fast lookup: + +- **NC3**: Arrays with `NC_hashmap` +- **NC4/HDF5/Zarr**: `NCindex` (hash-based index) + +## Important Headers + +### Public API + +- `netcdf.h` - Main public API +- `netcdf_par.h` - Parallel I/O extensions +- `netcdf_filter.h` - Filter API +- `netcdf_mem.h` - In-memory file API + +### Internal Interfaces + +- `ncdispatch.h` - Dispatch layer interfaces +- `netcdf_dispatch.h` - NC_Dispatch structure definition +- `nc.h` - NC structure and common functions +- `nc3internal.h` - NetCDF-3 internal structures +- `nc4internal.h` - NetCDF-4 internal structures +- `nc3dispatch.h`, `nc4dispatch.h`, `hdf5dispatch.h` - Format-specific dispatch headers + +## When to Use This Skill + +Use this skill when: + +- **Adding new features** to NetCDF-C +- **Debugging format-specific issues** (e.g., HDF5 vs Zarr differences) +- **Understanding data flow** through the library +- **Implementing new dispatch tables** or storage backends +- **Developing UDF plugins** for custom file formats +- **Modifying I/O operations** (chunking, compression, filters) +- **Working with metadata structures** (groups, types, dimensions) +- **Investigating performance issues** (caching, I/O patterns) +- **Integrating new protocols** (new remote access methods) +- **Extending NetCDF-C** with proprietary or domain-specific formats + +## Quick Reference + +### Find the Right File + +**For API entry points**: Look in `libdispatch/d*.c` +**For NetCDF-3 operations**: Look in `libsrc/` +**For HDF5 operations**: Look in `libhdf5/` +**For Zarr operations**: Look in `libnczarr/` +**For remote access**: Look in `libdap2/` or `libdap4/` +**For data encoding**: Look in `libsrc/ncx.c` +**For I/O backends**: Look in `libsrc/*io.c` or `libnczarr/zmap*.c` + +### Common Tasks + +**Adding a new API function**: + +1. Add to `include/netcdf.h` +2. Add entry point in `libdispatch/` +3. Add to `NC_Dispatch` structure +4. Implement in each format's dispatch table + +**Adding a new format**: + +1. Create new library directory +2. Implement `NC_Dispatch` table +3. Register in `libdispatch/ddispatch.c` +4. Add format detection logic + +**Debugging I/O issues**: + +1. Enable logging: `export NETCDF_LOG_LEVEL=5` +2. Check dispatch table selection +3. Trace through format-specific implementation +4. Check I/O layer (ncio, HDF5 VFD, zmap) + +## Additional Resources + +See [references/COMPONENTS.md](references/COMPONENTS.md) for detailed component descriptions. + +See [references/DATA-STRUCTURES.md](references/DATA-STRUCTURES.md) for complete data structure documentation. + +See [references/DISPATCH-TABLES.md](references/DISPATCH-TABLES.md) for all dispatch table implementations. + +See [references/UDF-PLUGINS.md](references/UDF-PLUGINS.md) for comprehensive UDF plugin development guide. + +See [references/EXAMPLES.md](references/EXAMPLES.md) for programming examples and common patterns. + +See [references/FORTRAN-INTERFACE.md](references/FORTRAN-INTERFACE.md) for NetCDF Fortran 90 API documentation and usage patterns. diff --git a/windsurf-harnett/skills/netcdf-architecture/references/COMPONENTS.md b/windsurf-harnett/skills/netcdf-architecture/references/COMPONENTS.md new file mode 100644 index 000000000..b7ed6dd43 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/references/COMPONENTS.md @@ -0,0 +1,522 @@ +# NetCDF-C Component Details + +This reference provides detailed information about each major component in the NetCDF-C library. + +## libdispatch/ - Central Routing Layer + +### Core Files + +**ddispatch.c** (477 lines) + +- Global state management (`NCglobalstate`) +- Dispatch initialization (`NCDISPATCH_initialize()`, `NCDISPATCH_finalize()`) +- Atomic type utilities +- Alignment configuration +- Manages: temp directories, home directory, RC files, chunk cache defaults + +**dfile.c** (2225 lines) + +- File open/create orchestration +- User-defined format registration (`nc_def_user_format()`) +- Format detection coordination +- NC structure lifecycle management + +**dinfermodel.c** (46KB) + +- Format detection from magic numbers +- URL scheme parsing (http://, https://, s3://, file://) +- Mode flag analysis +- Dispatch table selection logic + +**dvarget.c / dvarput.c** + +- Variable I/O entry points +- Type conversion coordination +- Stride/index calculation +- Delegates to format-specific `get_vara()` / `put_vara()` + +**dvar.c, datt.c, ddim.c, dgroup.c, dtype.c** + +- Metadata operation entry points +- Validation and error checking +- Delegation to format-specific implementations + +### Utility Files + +**ncjson.c** (35KB) - JSON parsing for Zarr metadata +**ncuri.c** (35KB) - URI parsing and manipulation +**ncbytes.c** - Dynamic byte buffer management +**dauth.c** (12KB) - Authentication (AWS, bearer tokens) +**dhttp.c** (25KB) - HTTP operations via libcurl +**ds3util.c** (32KB) - S3/cloud storage utilities +**nclist.c** - Dynamic list data structure +**nchashmap.c** (149KB) - Hash map implementation +**ncindex.c** - Index structure for fast metadata lookup + +## libsrc/ - Classic NetCDF-3 + +### Format Support + +- CDF-1: Classic format (32-bit offsets) +- CDF-2: 64-bit offset format +- CDF-5: 64-bit data format (large variables) + +### Core Implementation + +**nc3dispatch.c** (517 lines) + +- `NC3_dispatcher` table with ~70 function pointers +- Implements all required dispatch operations +- Returns `NC_ENOTNC4` for NetCDF-4-only features +- Stubs for groups, user-defined types, compression + +**nc3internal.c** (1784 lines) + +- `NC3_INFO` structure management +- Metadata lifecycle (create, duplicate, free) +- Type checking (`nc3_cktype()`) +- Format version handling + +**ncx.c** (743KB - generated from ncx.m4) + +- External Data Representation (XDR-like) +- Encoding/decoding for all atomic types +- Byte swapping for endianness +- Padding and alignment +- Platform-specific optimizations + +**putget.c** (353KB - generated from putget.m4) + +- Variable data I/O operations +- Type conversion matrix (all type combinations) +- Strided access implementation +- Record variable handling +- Fill value management + +**attr.c** (47KB - generated from attr.m4) + +- Attribute CRUD operations +- Attribute type conversion +- Global vs variable attributes +- Attribute renaming and deletion + +**var.c** (18KB) + +- Variable definition and inquiry +- Variable renaming +- Coordinate variable detection +- Record variable management + +**dim.c** (10KB) + +- Dimension definition and inquiry +- Unlimited dimension handling +- Dimension renaming + +### I/O Layer (ncio abstraction) + +**ncio.h / ncio.c** + +- Abstract I/O interface +- Strategy pattern for different backends +- Buffer management + +**posixio.c** (45KB) + +- POSIX file I/O (open, read, write, seek) +- Memory-mapped I/O option +- File locking +- Platform-specific optimizations + +**memio.c** (20KB) + +- In-memory file implementation +- Dynamic buffer growth +- Extract to external memory + +**httpio.c** (8KB) + +- HTTP byte-range requests +- Read-only access +- Caching strategy + +**s3io.c** (8KB) + +- S3 object storage access +- Byte-range requests +- AWS SDK integration + +### Data Structures + +```c +typedef struct NC3_INFO { + size_t chunk; // Chunk size hint + size_t xsz; // External file size + size_t begin_var; // Offset to non-record variables + size_t begin_rec; // Offset to record variables + size_t recsize; // Size of one record + size_t numrecs; // Number of records + NC_dimarray dims; // Dimensions + NC_attrarray attrs; // Global attributes + NC_vararray vars; // Variables + ncio* nciop; // I/O provider + int flags; // File flags + // ... more fields +} NC3_INFO; + +typedef struct NC_var { + NC_string* name; // Variable name + size_t ndims; // Number of dimensions + int* dimids; // Dimension IDs + NC_attrarray attrs; // Variable attributes + nc_type type; // Data type + size_t len; // Product of dimension sizes + size_t begin; // Offset in file + // ... more fields +} NC_var; +``` + +## libhdf5/ - HDF5 Storage Backend + +### Core Files + +**hdf5dispatch.c** (152 lines) + +- `HDF5_dispatcher` table +- Initialization/finalization +- HTTP VFD registration + +**nc4hdf.c** (87KB) + +- Core HDF5 integration +- File creation with HDF5 API +- Metadata synchronization +- Group/dataset creation +- Attribute handling + +**hdf5open.c** (99KB) + +- File opening logic +- HDF5 metadata reading +- Dimension scale detection +- Coordinate variable identification +- User-defined type reconstruction + +**hdf5var.c** (85KB) + +- Variable I/O operations +- Chunking coordination with HDF5 +- Filter pipeline management +- Type conversion +- Parallel I/O support + +**hdf5attr.c** (28KB) + +- Attribute operations via HDF5 +- Reserved attribute handling (\_NCProperties, etc.) +- Type conversion for attributes + +**hdf5filter.c** (16KB) + +- Filter plugin management +- HDF5 filter pipeline integration +- Compression (deflate, szip, etc.) +- Custom filter registration + +**hdf5dim.c, hdf5grp.c, hdf5type.c** + +- Dimension, group, and type operations +- HDF5 dimension scales +- Group hierarchy management +- User-defined type translation + +**H5FDhttp.c** (28KB) + +- HTTP Virtual File Driver +- Byte-range request support +- Read-only remote access +- Caching strategy + +### Key Features + +- Uses HDF5 dimension scales for dimensions +- Stores NetCDF metadata in HDF5 attributes +- Supports chunking, compression, filters +- Parallel I/O via HDF5 parallel features +- Backward compatible with pure HDF5 files (with limitations) + +## libnczarr/ - Zarr Storage + +### Core Files + +**zdispatch.c** (323 lines) + +- `NCZ_dispatcher` table +- Many operations delegate to libsrc4 +- Zarr-specific implementations for I/O and metadata + +**zarr.c** (8KB) + +- Main Zarr format implementation +- Format version handling +- Metadata JSON generation + +**zsync.c** (84KB) + +- Data synchronization between memory and storage +- Chunk reading/writing +- Metadata persistence (.zarray, .zgroup, .zattrs) +- Cache management + +**zvar.c** (76KB) + +- Variable operations +- Chunk coordinate calculation +- Data assembly from chunks +- Fill value handling + +**zfilter.c** (37KB) + +- Codec pipeline implementation +- Compression (blosc, zlib, etc.) +- Filter chaining +- Plugin support + +**zxcache.c** (27KB) + +- Chunk cache implementation +- LRU eviction +- Dirty chunk tracking +- Write-back strategy + +### Storage Abstraction (zmap) + +**zmap.c** (11KB) + +- Abstract storage interface +- Key-value semantics +- Backend selection + +**zmap_file.c** (31KB) + +- Filesystem backend +- Directory structure (.zarray, .zgroup files) +- Atomic writes + +**zmap_s3sdk.c** (15KB) + +- AWS S3 backend +- Object storage operations +- Credential management + +**zmap_zip.c** (22KB) + +- ZIP archive backend +- Read-only access +- Efficient random access + +### Zarr Format Details + +- JSON metadata (.zarray, .zgroup, .zattrs) +- Chunked storage (one file per chunk) +- Codec pipeline (compression, filters) +- Dimension separator (. or /) +- V2 and V3 format support (partial) + +## libdap2/ + oc2/ - DAP2 Client + +### libdap2/ Files + +**ncd2dispatch.c** (85KB) + +- `NCD2_dispatcher` table +- Complete dispatch implementation +- Constraint handling integration + +**getvara.c** (44KB) + +- Maps NetCDF API to DAP requests +- Constraint expression generation +- Subsetting and striding +- Type conversion + +**constraints.c** (25KB) + +- DAP constraint expression parsing +- Projection and selection +- Optimization + +**cache.c** (13KB) + +- HTTP response caching +- Cache invalidation +- Disk-based cache + +### oc2/ - OPeNDAP Client Library + +**oc.c** (62KB) + +- Main client implementation +- Connection management +- Request/response handling +- Error handling + +**dapparse.c / daplex.c** + +- DDS (Dataset Descriptor Structure) parsing +- DAS (Dataset Attribute Structure) parsing +- Lexical analysis + +**ocdata.c** (10KB) + +- Binary data decoding +- XDR stream processing +- Data assembly + +**occurlfunctions.c** (9KB) + +- libcurl integration +- HTTP request building +- Authentication +- SSL/TLS support + +### DAP2 Protocol + +- DDS: Describes data structure +- DAS: Describes attributes +- DODS: Binary data response +- Constraint expressions for subsetting + +## libdap4/ - DAP4 Client + +### Core Files + +**ncd4dispatch.c** (24KB) + +- `NCD4_dispatcher` table +- DAP4-specific operations + +**d4parser.c** (49KB) + +- DMR (Dataset Metadata Response) parsing +- XML-based metadata +- Namespace handling + +**d4data.c** (14KB) + +- Binary data handling +- Checksum verification +- Data assembly + +**d4chunk.c** (6KB) + +- Chunked response processing +- Streaming data support + +**d4meta.c** (34KB) + +- Metadata translation to NetCDF model +- Group hierarchy construction +- Type mapping + +**d4curlfunctions.c** (15KB) + +- HTTP operations +- Authentication +- Error handling + +### DAP4 Protocol + +- DMR: XML metadata response +- Binary data with checksums +- Chunked transfer encoding +- Enhanced type system + +## Support Libraries + +### libncpoco/ + +Portable components for cross-platform compatibility + +### libncxml/ + +XML parsing for DAP4 DMR responses + +### liblib/ + +Additional utility code and compatibility layers + +## Global State (NCglobalstate) + +Located in `libdispatch/ddispatch.c`: + +```c +typedef struct NCglobalstate { + char* tempdir; // Temporary directory + char* home; // Home directory + char* cwd; // Current working directory + + struct NCRCinfo* rcinfo; // RC file configuration + + struct { + size_t size; // Chunk cache size + size_t nelems; // Number of elements + float preemption; // Preemption policy + } chunkcache; + + struct { + int threshold; // Alignment threshold + int alignment; // Alignment value + int defined; // Whether set + } alignment; + + struct { + char* default_region; // AWS region + char* config_file; // AWS config file + char* profile; // AWS profile + char* access_key_id; // Access key + char* secret_access_key; // Secret key + } aws; + + NClist* pluginpaths; // Filter plugin paths +} NCglobalstate; +``` + +## Filter/Codec System + +### HDF5 Filters + +- Standard: deflate, shuffle, fletcher32, szip +- Plugin system for custom filters +- Filter IDs registered with HDF Group +- Parameters passed as unsigned int arrays + +### Zarr Codecs + +- JSON-based codec configuration +- Codec chain (multiple codecs) +- Standard: blosc, zlib, gzip, lz4, zstd +- Extensible through plugins + +## Parallel I/O Support + +### HDF5 Parallel + +- Uses MPI-IO via HDF5 +- Collective and independent I/O +- Requires parallel HDF5 build + +### PnetCDF (libsrcp/) + +- Experimental parallel I/O for NetCDF-3 +- MPI-IO based +- Separate dispatch table + +## Testing Structure + +- `nc_test/` - NetCDF-3 tests +- `nc_test4/` - NetCDF-4/HDF5 tests +- `nczarr_test/` - Zarr tests +- `ncdap_test/` - DAP2 tests +- `unit_test/` - Unit tests +- `h5_test/` - HDF5 interoperability tests diff --git a/windsurf-harnett/skills/netcdf-architecture/references/DATA-STRUCTURES.md b/windsurf-harnett/skills/netcdf-architecture/references/DATA-STRUCTURES.md new file mode 100644 index 000000000..59aceef90 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/references/DATA-STRUCTURES.md @@ -0,0 +1,484 @@ +# NetCDF-C Data Structures Reference + +This reference documents the key data structures used throughout NetCDF-C. + +## Common Structures + +### NC - File Handle (nc.h) + +```c +typedef struct NC { + int ext_ncid; // External ID (user-visible) + int int_ncid; // Internal ID (format-specific) + const struct NC_Dispatch* dispatch; // Function pointer table + void* dispatchdata; // Format-specific metadata + char* path; // File path + int mode; // Open mode flags +} NC; +``` + +**Location**: `include/nc.h:23-30` + +**Purpose**: Common handle for all open files, regardless of format. + +**Key Fields**: + +- `ext_ncid`: The ID returned to users, managed globally +- `int_ncid`: Format-specific ID (e.g., NetCDF-3 has its own ID space) +- `dispatch`: Points to format-specific function table +- `dispatchdata`: Points to `NC3_INFO`, `NC_FILE_INFO_T`, etc. + +### NC_Dispatch - Function Pointer Table (netcdf_dispatch.h) + +```c +struct NC_Dispatch { + int model; // NC_FORMATX_NC3, NC_FORMATX_NC4, etc. + int dispatch_version; // Must match NC_DISPATCH_VERSION + + // File operations + int (*create)(const char *path, int cmode, ...); + int (*open)(const char *path, int mode, ...); + int (*redef)(int); + int (*_enddef)(int, size_t, size_t, size_t, size_t); + int (*sync)(int); + int (*abort)(int); + int (*close)(int, void *); + + // Metadata operations + int (*def_dim)(int, const char *, size_t, int *); + int (*def_var)(int, const char *, nc_type, int, const int *, int *); + int (*put_att)(int, int, const char *, nc_type, size_t, const void *, nc_type); + + // Variable I/O + int (*get_vara)(int, int, const size_t *, const size_t *, void *, nc_type); + int (*put_vara)(int, int, const size_t *, const size_t *, const void *, nc_type); + + // ... ~60 more function pointers +}; +``` + +**Location**: `include/netcdf_dispatch.h:34-256` + +**Implementations**: + +- `NC3_dispatcher` - NetCDF-3 (libsrc/nc3dispatch.c) +- `HDF5_dispatcher` - HDF5 (libhdf5/hdf5dispatch.c) +- `NCZ_dispatcher` - Zarr (libnczarr/zdispatch.c) +- `NCD2_dispatcher` - DAP2 (libdap2/ncd2dispatch.c) +- `NCD4_dispatcher` - DAP4 (libdap4/ncd4dispatch.c) + +## NetCDF-3 Structures (libsrc) + +### NC3_INFO - NetCDF-3 File Metadata + +```c +typedef struct NC3_INFO { + size_t chunk; // Chunk size hint for I/O + size_t xsz; // External file size + size_t begin_var; // Offset to non-record variables + size_t begin_rec; // Offset to record variables + size_t recsize; // Size of one record + size_t numrecs; // Number of records written + + NC_dimarray dims; // Dimensions + NC_attrarray attrs; // Global attributes + NC_vararray vars; // Variables + + ncio* nciop; // I/O provider + int flags; // File flags + int old_format; // CDF-1, CDF-2, or CDF-5 +} NC3_INFO; +``` + +**Location**: Defined across `include/nc3internal.h` + +### NC_dim - Dimension + +```c +typedef struct { + NC_string* name; // Dimension name + size_t size; // Dimension length (NC_UNLIMITED for unlimited) +} NC_dim; +``` + +### NC_var - Variable + +```c +typedef struct NC_var { + NC_string* name; // Variable name + size_t ndims; // Number of dimensions + int* dimids; // Dimension IDs + NC_attrarray attrs; // Variable attributes + nc_type type; // Data type + size_t len; // Product of dimension sizes + size_t begin; // Offset in file + + // For record variables + size_t* shape; // Cached dimension sizes + size_t* dsizes; // Cached dimension products +} NC_var; +``` + +### NC_attr - Attribute + +```c +typedef struct NC_attr { + NC_string* name; // Attribute name + nc_type type; // Data type + size_t nelems; // Number of elements + void* xvalue; // Attribute value (external representation) +} NC_attr; +``` + +### NC_dimarray, NC_vararray, NC_attrarray + +```c +typedef struct NC_dimarray { + size_t nalloc; // Allocated size + size_t nelems; // Number of elements + NC_hashmap* hashmap; // For fast name lookup + NC_dim** value; // Array of dimension pointers +} NC_dimarray; + +// Similar for NC_vararray and NC_attrarray +``` + +**Key Feature**: Hash maps for O(1) name lookup + +## NetCDF-4 Structures (libsrc4, libhdf5, libnczarr) + +### NC_FILE_INFO_T - File Metadata + +```c +typedef struct NC_FILE_INFO_T { + NC_GRP_INFO_T* root_grp; // Root group + int no_write; // Read-only flag + int ignore_att_convention; // Ignore _NCProperties + void* format_file_info; // Format-specific data (HDF5/Zarr) + + // Provenance tracking + char* provenance; + int provenance_size; +} NC_FILE_INFO_T; +``` + +**Location**: `include/nc4internal.h` + +### NC_GRP_INFO_T - Group Metadata + +```c +typedef struct NC_GRP_INFO_T { + NC_OBJ hdr; // Name and ID + struct NC_FILE_INFO_T* nc4_info; // Parent file + struct NC_GRP_INFO_T* parent; // Parent group (NULL for root) + + NCindex* children; // Child groups + NCindex* dim; // Dimensions + NCindex* att; // Attributes + NCindex* type; // User-defined types + NCindex* vars; // Variables + + void* format_grp_info; // Format-specific data +} NC_GRP_INFO_T; +``` + +**Key Feature**: Hierarchical group structure with NCindex for fast lookup + +### NC_VAR_INFO_T - Variable Metadata + +```c +typedef struct NC_VAR_INFO_T { + NC_OBJ hdr; // Name and ID + char* alt_name; // Alternate name (for format differences) + struct NC_GRP_INFO_T* container; // Parent group + + size_t ndims; // Number of dimensions + int* dimids; // Dimension IDs + NC_DIM_INFO_T** dim; // Dimension pointers + + nc_bool_t is_new_var; // Newly created + nc_bool_t was_coord_var; // Was a coordinate variable + nc_bool_t became_coord_var; // Became a coordinate variable + nc_bool_t fill_val_changed; // Fill value changed + nc_bool_t attr_dirty; // Attributes need rewriting + nc_bool_t created; // Already created in file + nc_bool_t written_to; // Has data been written + + struct NC_TYPE_INFO* type_info; // Type information + int atts_read; // Attributes read flag + nc_bool_t meta_read; // Metadata read flag + nc_bool_t coords_read; // Coordinates read flag + + NCindex* att; // Attributes + + nc_bool_t no_fill; // No fill value + void* fill_value; // Fill value + + size_t* chunksizes; // Chunk sizes (if chunked) + int storage; // NC_CHUNKED, NC_CONTIGUOUS, NC_COMPACT + int endianness; // NC_ENDIAN_NATIVE, NC_ENDIAN_LITTLE, NC_ENDIAN_BIG + int parallel_access; // NC_COLLECTIVE or NC_INDEPENDENT + + struct ChunkCache { + size_t size; // Cache size in bytes + size_t nelems; // Number of cache slots + float preemption; // Preemption policy + } chunkcache; + + int quantize_mode; // Quantization mode + int nsd; // Number of significant digits + + void* format_var_info; // Format-specific data + void* filters; // Filter list +} NC_VAR_INFO_T; +``` + +**Location**: `include/nc4internal.h:166-201` + +### NC_DIM_INFO_T - Dimension Metadata + +```c +typedef struct NC_DIM_INFO_T { + NC_OBJ hdr; // Name and ID + struct NC_GRP_INFO_T* container; // Parent group + size_t len; // Dimension length + nc_bool_t unlimited; // Is unlimited + nc_bool_t extended; // Needs extension + nc_bool_t too_long; // Length too large for size_t + void* format_dim_info; // Format-specific data + struct NC_VAR_INFO* coord_var; // Coordinate variable +} NC_DIM_INFO_T; +``` + +### NC_ATT_INFO_T - Attribute Metadata + +```c +typedef struct NC_ATT_INFO_T { + NC_OBJ hdr; // Name and ID + struct NC_OBJ* container; // Parent group or variable + size_t len; // Number of elements + nc_bool_t dirty; // Modified flag + nc_bool_t created; // Already created + nc_type nc_typeid; // Data type + void* format_att_info; // Format-specific data + void* data; // Attribute value +} NC_ATT_INFO_T; +``` + +### NC_TYPE_INFO_T - User-Defined Type + +```c +typedef struct NC_TYPE_INFO_T { + NC_OBJ hdr; // Name and ID + struct NC_GRP_INFO_T* container; // Parent group + unsigned rc; // Reference count + int endianness; // Byte order + size_t size; // Size in bytes + nc_bool_t committed; // Committed to file + nc_type nc_type_class; // NC_VLEN, NC_COMPOUND, NC_OPAQUE, NC_ENUM + void* format_type_info; // Format-specific data + int varsized; // Variable-sized flag + + union { + struct { + NClist* enum_member; // Enum members + nc_type base_nc_typeid; // Base type + } e; + + struct { + NClist* field; // Compound fields + } c; + + struct { + nc_type base_nc_typeid; // Base type + } v; + } u; +} NC_TYPE_INFO_T; +``` + +### NC_FIELD_INFO_T - Compound Field + +```c +typedef struct NC_FIELD_INFO_T { + NC_OBJ hdr; // Name and ID + nc_type nc_typeid; // Field type + size_t offset; // Offset in compound + int ndims; // Number of dimensions + int* dim_size; // Dimension sizes + void* format_field_info; // Format-specific data +} NC_FIELD_INFO_T; +``` + +### NC_OBJ - Common Object Header + +```c +typedef struct NC_OBJ { + NC_SORT sort; // NCVAR, NCDIM, NCATT, NCTYP, NCGRP, NCFIL + char* name; // Object name + int id; // Object ID +} NC_OBJ; +``` + +**Purpose**: Common header for all indexed objects. All structures that go into NCindex must start with NC_OBJ. + +## Index Structures + +### NCindex - Fast Lookup Index + +```c +typedef struct NCindex { + size_t count; // Number of entries + size_t alloc; // Allocated size + void** content; // Array of NC_OBJ* pointers +} NCindex; +``` + +**Location**: `include/ncindex.h` + +**Operations**: O(1) by ID, O(n) by name (uses linear search) + +### NC_hashmap - Hash Map + +```c +typedef struct NC_hashmap { + size_t size; // Hash table size + size_t count; // Number of entries + struct NC_hentry** table; // Hash table +} NC_hashmap; +``` + +**Location**: `include/nchashmap.h` + +**Operations**: O(1) average case for name lookup + +## I/O Structures + +### ncio - I/O Provider (NetCDF-3) + +```c +typedef struct ncio { + const char* path; // File path + int ioflags; // I/O flags + off_t offset; // Current offset + size_t extent; // File extent + size_t nciop_size; // Provider-specific size + + // Function pointers + int (*rel)(ncio*, off_t, int); + int (*get)(ncio*, off_t, size_t, int, void**); + int (*move)(ncio*, off_t, off_t, size_t); + int (*sync)(ncio*); + int (*filesize)(ncio*, off_t*); + int (*pad_length)(ncio*, off_t); + int (*close)(ncio*, int); + + void* pvt; // Private data +} ncio; +``` + +**Location**: `libsrc/ncio.h` + +**Implementations**: posixio, memio, httpio, s3io + +## Global State + +### NCglobalstate - Global Configuration + +```c +typedef struct NCglobalstate { + char* tempdir; // Temporary directory + char* home; // Home directory + char* cwd; // Current working directory + + struct NCRCinfo* rcinfo; // RC file info + + struct { + size_t size; // Chunk cache size + size_t nelems; // Number of elements + float preemption; // Preemption policy + } chunkcache; + + struct { + int threshold; // Alignment threshold + int alignment; // Alignment value + int defined; // Set flag + } alignment; + + struct { + char* default_region; // AWS region + char* config_file; // Config file path + char* profile; // Profile name + char* access_key_id; // Access key + char* secret_access_key; // Secret key + } aws; + + NClist* pluginpaths; // Filter plugin paths +} NCglobalstate; +``` + +**Location**: `libdispatch/ddispatch.c` + +**Access**: `NC_getglobalstate()` + +## Utility Structures + +### NC_string - Counted String + +```c +typedef struct { + size_t nchars; // String length + char* cp; // String data +} NC_string; +``` + +### NClist - Dynamic List + +```c +typedef struct NClist { + size_t alloc; // Allocated size + size_t length; // Current length + void** content; // Array of pointers +} NClist; +``` + +### NCbytes - Dynamic Byte Buffer + +```c +typedef struct NCbytes { + size_t alloc; // Allocated size + size_t length; // Current length + char* content; // Buffer +} NCbytes; +``` + +## Format-Specific Structures + +### HDF5-Specific (NC_HDF5_FILE_INFO_T, etc.) + +Stored in `format_file_info`, `format_var_info`, etc. fields. + +Contains HDF5 handles (hid_t), property lists, and other HDF5-specific data. + +### Zarr-Specific (NCZ_FILE_INFO_T, etc.) + +Stored in `format_file_info`, `format_var_info`, etc. fields. + +Contains Zarr metadata (JSON), chunk cache, zmap handles, codec information. + +## Memory Management + +**Allocation**: Most structures use `calloc()` for zero-initialization + +**Deallocation**: Each structure type has a corresponding `free_*()` function + +**Reference Counting**: User-defined types use reference counting (`NC_TYPE_INFO_T.rc`) + +**String Handling**: NC_string structures manage their own memory + +## Thread Safety + +**Global State**: Protected by internal locks (implementation-dependent) + +**File Handles**: Not thread-safe - one thread per file handle + +**Parallel I/O**: Uses MPI for coordination, not threading diff --git a/windsurf-harnett/skills/netcdf-architecture/references/DISPATCH-TABLES.md b/windsurf-harnett/skills/netcdf-architecture/references/DISPATCH-TABLES.md new file mode 100644 index 000000000..2f77cd627 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/references/DISPATCH-TABLES.md @@ -0,0 +1,757 @@ +# NetCDF-C Dispatch Tables Reference + +This reference documents all dispatch table implementations in NetCDF-C. + +## Dispatch Table Overview + +The `NC_Dispatch` structure contains function pointers for all NetCDF operations. Each format implements this interface. + +**Definition**: `include/netcdf_dispatch.h:34-256` + +**Current Version**: `NC_DISPATCH_VERSION = 5` + +## NC3 Dispatch Table (NetCDF-3) + +**File**: `libsrc/nc3dispatch.c:81-174` + +**Table Name**: `NC3_dispatcher` + +**Model**: `NC_FORMATX_NC3` + +### Implementation Summary + +**Fully Implemented**: + +- File operations: create, open, close, sync, abort, redef, enddef +- Dimensions: def_dim, inq_dim, inq_dimid, inq_unlimdim, rename_dim +- Variables: def_var, inq_var, inq_varid, rename_var +- Attributes: put_att, get_att, inq_att, del_att, rename_att +- Variable I/O: get_vara, put_vara +- Inquiry: inq, inq_format, inq_type +- Fill values: set_fill, def_var_fill + +**Delegated to NCDEFAULT**: + +- get_vars, put_vars (strided access) +- get_varm, put_varm (mapped access) + +**Returns NC_ENOTNC4** (not supported): + +- Groups: def_grp, rename_grp, inq_grps, inq_grp_parent +- User-defined types: def_compound, def_vlen, def_enum, def_opaque +- Compression: def_var_deflate, def_var_fletcher32 +- Chunking: def_var_chunking, set_var_chunk_cache +- Filters: def_var_filter, inq_var_filter_ids +- Endianness: def_var_endian +- Quantization: def_var_quantize + +**Special Implementations**: + +- `inq_unlimdims`: Returns single unlimited dimension (NC3 has max 1) +- `inq_ncid`: Returns same ncid (no groups) +- `inq_grpname`: Returns "/" (root only) +- `inq_varids`, `inq_dimids`: Returns sequential IDs 0..n-1 + +### Key Functions + +```c +static const NC_Dispatch NC3_dispatcher = { + .model = NC_FORMATX_NC3, + .dispatch_version = NC_DISPATCH_VERSION, + + .create = NC3_create, + .open = NC3_open, + .redef = NC3_redef, + ._enddef = NC3__enddef, + .sync = NC3_sync, + .abort = NC3_abort, + .close = NC3_close, + + .get_vara = NC3_get_vara, + .put_vara = NC3_put_vara, + .get_vars = NCDEFAULT_get_vars, + .put_vars = NCDEFAULT_put_vars, + + // ... more function pointers +}; +``` + +## HDF5 Dispatch Table (NetCDF-4/HDF5) + +**File**: `libhdf5/hdf5dispatch.c:19-114` + +**Table Name**: `HDF5_dispatcher` + +**Model**: `NC_FORMATX_NC4` + +### Implementation Summary + +**Fully Implemented**: + +- All file operations +- All dimension operations (with HDF5 dimension scales) +- All variable operations (with chunking, compression, filters) +- All attribute operations (including reserved attributes) +- All group operations (hierarchical groups) +- All user-defined types (compound, vlen, enum, opaque) +- Compression and filters +- Chunking and endianness +- Parallel I/O (if HDF5 built with parallel support) +- Quantization (NetCDF-4.8+) + +**Delegated to NCDEFAULT**: + +- get_varm, put_varm (mapped access) + +**HDF5-Specific Features**: + +- Dimension scales for dimensions +- Reserved attributes (\_NCProperties, \_Netcdf4Coordinates, etc.) +- Filter plugins +- Chunk cache tuning +- Parallel I/O via MPI + +### Key Functions + +```c +static const NC_Dispatch HDF5_dispatcher = { + .model = NC_FORMATX_NC4, + .dispatch_version = NC_DISPATCH_VERSION, + + .create = NC4_create, + .open = NC4_open, + + .def_dim = HDF5_def_dim, + .inq_dim = HDF5_inq_dim, + .rename_dim = HDF5_rename_dim, + + .def_var = NC4_def_var, + .get_vara = NC4_get_vara, + .put_vara = NC4_put_vara, + .get_vars = NC4_get_vars, + .put_vars = NC4_put_vars, + + .def_var_deflate = NC4_def_var_deflate, + .def_var_chunking = NC4_def_var_chunking, + .def_var_filter = NC4_hdf5_def_var_filter, + + .def_grp = NC4_def_grp, + .def_compound = NC4_def_compound, + + // ... more function pointers +}; +``` + +## Zarr Dispatch Table (NCZarr) + +**File**: `libnczarr/zdispatch.c:19-111` + +**Table Name**: `NCZ_dispatcher` + +**Model**: `NC_FORMATX_NCZARR` + +### Implementation Summary + +**Fully Implemented**: + +- File operations (create, open, close, sync) +- Variable I/O (get_vara, put_vara, get_vars, put_vars) +- Zarr-specific metadata operations +- Codec/filter pipeline +- Chunk caching + +**Delegated to NC4 (libsrc4)**: + +- Most inquiry operations (inq_type, inq_dimid, inq_varid, etc.) +- Group operations (inq_grps, inq_grpname, etc.) +- Many metadata operations + +**Returns NC_NOTNC4** (not supported): + +- User-defined types (compound, vlen, enum, opaque) +- Some type operations + +**Zarr-Specific Features**: + +- JSON metadata (.zarray, .zgroup, .zattrs) +- Multiple storage backends (file, S3, ZIP) +- Codec pipeline (blosc, zlib, etc.) +- Dimension separator (. or /) + +### Key Functions + +```c +static const NC_Dispatch NCZ_dispatcher = { + .model = NC_FORMATX_NCZARR, + .dispatch_version = NC_DISPATCH_VERSION, + + .create = NCZ_create, + .open = NCZ_open, + .close = NCZ_close, + .sync = NCZ_sync, + + .get_vara = NCZ_get_vara, + .put_vara = NCZ_put_vara, + .get_vars = NCZ_get_vars, + .put_vars = NCZ_put_vars, + + // Many operations delegate to NC4_* + .inq_type = NCZ_inq_type, // Calls NC4_inq_type + .inq_dimid = NCZ_inq_dimid, // Calls NC4_inq_dimid + + .def_var_filter = NCZ_def_var_filter, + .def_var_chunking = NCZ_def_var_chunking, + + // User-defined types not supported + .def_compound = NC_NOTNC4_def_compound, + .def_vlen = NC_NOTNC4_def_vlen, + + // ... more function pointers +}; +``` + +## DAP2 Dispatch Table (OPeNDAP) + +**File**: `libdap2/ncd2dispatch.c` + +**Table Name**: `NCD2_dispatcher` + +**Model**: `NC_FORMATX_DAP2` + +### Implementation Summary + +**Fully Implemented**: + +- File operations (open, close) +- Variable I/O with constraint expressions +- Metadata inquiry +- Attribute access +- Remote data access via HTTP + +**Not Supported** (read-only protocol): + +- create, redef, enddef +- def_dim, def_var, put_att +- put_vara, put_vars +- All NetCDF-4 features + +**DAP2-Specific Features**: + +- Constraint expressions for subsetting +- DDS/DAS parsing +- HTTP caching +- URL-based access + +### Key Functions + +```c +static const NC_Dispatch NCD2_dispatcher = { + .model = NC_FORMATX_DAP2, + .dispatch_version = NC_DISPATCH_VERSION, + + .create = NULL, // Not supported + .open = NCD2_open, + .close = NCD2_close, + + .get_vara = NCD2_get_vara, + .put_vara = NULL, // Read-only + + .inq = NCD2_inq, + .inq_var = NCD2_inq_var, + .get_att = NCD2_get_att, + + // ... more function pointers +}; +``` + +## DAP4 Dispatch Table (OPeNDAP) + +**File**: `libdap4/ncd4dispatch.c` + +**Table Name**: `NCD4_dispatcher` + +**Model**: `NC_FORMATX_DAP4` + +### Implementation Summary + +**Fully Implemented**: + +- File operations (open, close) +- Variable I/O +- Metadata inquiry (DMR parsing) +- Group support +- Enhanced type system + +**Not Supported** (read-only protocol): + +- create, redef, enddef +- def_dim, def_var, put_att +- put_vara, put_vars +- User-defined types (read-only) + +**DAP4-Specific Features**: + +- DMR (XML metadata) +- Groups and hierarchies +- Checksums +- Chunked transfer encoding + +### Key Functions + +```c +static const NC_Dispatch NCD4_dispatcher = { + .model = NC_FORMATX_DAP4, + .dispatch_version = NC_DISPATCH_VERSION, + + .create = NULL, // Not supported + .open = NCD4_open, + .close = NCD4_close, + + .get_vara = NCD4_get_vara, + .put_vara = NULL, // Read-only + + .inq_grps = NCD4_inq_grps, // Groups supported + + // ... more function pointers +}; +``` + +## User-Defined Format Tables + +**Files**: `libdispatch/dfile.c`, `libdispatch/ddispatch.c` + +**Table Names**: `UDF0_dispatch_table` through `UDF9_dispatch_table` + +**Models**: `NC_FORMATX_UDF0` through `NC_FORMATX_UDF9` + +**Mode Flags**: `NC_UDF0` through `NC_UDF9` + +### Overview + +NetCDF-C provides 10 user-defined format slots that allow developers to extend the library with custom file formats and storage backends. Each slot can be independently configured with its own dispatch table, initialization function, and optional magic number. + +### UDF Slot Organization + +- **UDF0, UDF1**: Original slots, mode flags in lower 16 bits +- **UDF2-UDF9**: Extended slots, mode flags in upper 16 bits + +### Registration Methods + +#### Programmatic Registration + +Users can register custom formats via `nc_def_user_format()`: + +```c +int nc_def_user_format(int mode_flag, + NC_Dispatch* dispatch_table, + char* magic_number); +``` + +**Parameters**: + +- `mode_flag`: One of `NC_UDF0` through `NC_UDF9`, optionally combined with other mode flags (e.g., `NC_NETCDF4`) +- `dispatch_table`: Pointer to your `NC_Dispatch` structure +- `magic_number`: Optional magic number string (max `NC_MAX_MAGIC_NUMBER_LEN` bytes) for automatic format detection, or NULL + +**Example**: + +```c +extern NC_Dispatch my_format_dispatcher; + +// Register UDF in slot 0 with magic number +nc_def_user_format(NC_UDF0 | NC_NETCDF4, &my_format_dispatcher, "MYFORMAT"); + +// Now files with "MYFORMAT" magic number will use your dispatcher +int ncid; +nc_open("myfile.dat", 0, &ncid); // Auto-detects format +``` + +#### Query Registered UDFs + +Use `nc_inq_user_format()` to query registered formats: + +```c +int nc_inq_user_format(int mode_flag, + NC_Dispatch** dispatch_table, + char* magic_number); +``` + +**Example**: + +```c +NC_Dispatch *disp; +char magic[NC_MAX_MAGIC_NUMBER_LEN + 1]; +nc_inq_user_format(NC_UDF0, &disp, magic); +``` + +#### RC File Configuration + +UDFs can be automatically loaded from RC file configuration: + +**RC File Format** (`.ncrc`, `.daprc`, or `.dodsrc`): + +```ini +NETCDF.UDF.LIBRARY=/full/path/to/library.so +NETCDF.UDF.INIT=initialization_function_name +NETCDF.UDF.MAGIC=OPTIONAL_MAGIC_NUMBER +``` + +**Example**: + +```ini +# Load custom format in UDF0 +NETCDF.UDF0.LIBRARY=/usr/local/lib/libmyformat.so +NETCDF.UDF0.INIT=myformat_init +NETCDF.UDF0.MAGIC=MYFORMAT + +# Load scientific data format in UDF3 +NETCDF.UDF3.LIBRARY=/opt/scidata/lib/libscidata.so +NETCDF.UDF3.INIT=scidata_initialize +NETCDF.UDF3.MAGIC=SCIDATA +``` + +**RC File Requirements**: + +- `LIBRARY`: Must be a full absolute path to the shared library +- `INIT`: Name of the initialization function in the library +- `MAGIC`: Optional magic number for automatic format detection +- Both `LIBRARY` and `INIT` must be present; partial configuration is ignored with a warning + +**RC File Search Order**: + +1. `$HOME/.ncrc` +2. `$HOME/.daprc` +3. `$HOME/.dodsrc` +4. `$CWD/.ncrc` +5. `$CWD/.daprc` +6. `$CWD/.dodsrc` + +### Plugin Loading Process + +Plugins are loaded during library initialization (`nc_initialize()`): + +1. RC files are parsed +2. For each configured UDF slot: + - Library is loaded using `dlopen()` (Unix) or `LoadLibrary()` (Windows) + - Init function is located using `dlsym()` or `GetProcAddress()` + - Init function is called + - Init function must call `nc_def_user_format()` to register the dispatch table +3. Dispatch table ABI version is verified +4. Magic number (if provided) is registered for automatic format detection + +**Note**: Library handles are intentionally not closed; they remain loaded for the lifetime of the process. + +### Plugin Implementation Requirements + +**Dispatch Table Requirements**: + +- Dispatch table version must match `NC_DISPATCH_VERSION` +- Must implement all required operations or use pre-defined stubs +- Magic number max `NC_MAX_MAGIC_NUMBER_LEN` bytes (optional) + +**Initialization Function Requirements**: + +1. Must be exported (not static) +2. Must call `nc_def_user_format()` to register dispatch table +3. Should return `NC_NOERR` on success, error code on failure +4. Name must match RC file `INIT` key + +**Example Initialization Function**: + +```c +#include + +extern NC_Dispatch my_dispatcher; + +// Initialization function - must be exported +int my_plugin_init(void) { + int ret; + + // Register dispatch table with magic number + ret = nc_def_user_format(NC_UDF0 | NC_NETCDF4, + &my_dispatcher, + "MYFMT"); + if (ret != NC_NOERR) + return ret; + + // Additional initialization if needed + // ... + + return NC_NOERR; +} +``` + +### Pre-defined Dispatch Functions + +For operations your format doesn't support, use these pre-defined functions: + +**Read-only stubs** (`libdispatch/dreadonly.c`): + +- `NC_RO_create`, `NC_RO_redef`, `NC_RO__enddef`, `NC_RO_sync` +- `NC_RO_set_fill`, `NC_RO_def_dim`, `NC_RO_def_var`, `NC_RO_put_att` +- `NC_RO_put_vara`, `NC_RO_put_vars`, `NC_RO_put_varm` +- Returns `NC_EPERM` (operation not permitted) + +**Not NetCDF-4 stubs** (`libdispatch/dnotnc4.c`): + +- `NC_NOTNC4_def_grp`, `NC_NOTNC4_def_compound`, `NC_NOTNC4_def_vlen` +- `NC_NOTNC4_def_var_deflate`, `NC_NOTNC4_def_var_chunking` +- Returns `NC_ENOTNC4` (not a NetCDF-4 file) + +**Not NetCDF-3 stubs** (`libdispatch/dnotnc3.c`): + +- Returns `NC_ENOTNC3` (not a NetCDF-3 file) + +**No-op stubs**: + +- `NC_NOOP_*` - Returns `NC_NOERR` without doing anything + +**Default implementations** (`libdispatch/dvar.c`): + +- `NCDEFAULT_get_vars`, `NCDEFAULT_put_vars` - Strided access using get_vara/put_vara +- `NCDEFAULT_get_varm`, `NCDEFAULT_put_varm` - Mapped access using get_vars/put_vars + +**NetCDF-4 inquiry functions** (`libsrc4/`): + +- `NC4_inq`, `NC4_inq_type`, `NC4_inq_dimid`, `NC4_inq_varid` +- Use internal metadata model for inquiry operations + +### Example Minimal Dispatch Table + +```c +#include "netcdf_dispatch.h" + +static NC_Dispatch my_dispatcher = { + NC_FORMATX_UDF0, /* Use UDF slot 0 */ + NC_DISPATCH_VERSION, /* Current ABI version */ + + NC_RO_create, /* Read-only: use predefined function */ + my_open, /* Custom open function */ + NC_RO_redef, + NC_RO__enddef, + NC_RO_sync, + my_abort, + my_close, + NC_RO_set_fill, + my_inq_format, + my_inq_format_extended, + + /* Inquiry functions - can use NC4_* defaults */ + NC4_inq, + NC4_inq_type, + NC4_inq_dimid, + NC4_inq_varid, + + /* Variable I/O */ + my_get_vara, + NC_RO_put_vara, /* Read-only */ + NCDEFAULT_get_vars, /* Use default strided implementation */ + NC_RO_put_vars, + NCDEFAULT_get_varm, /* Use default mapped implementation */ + NC_RO_put_varm, + + /* NetCDF-4 features not supported */ + NC_NOTNC4_def_grp, + NC_NOTNC4_def_compound, + NC_NOTNC4_def_vlen, + NC_NOTNC4_def_var_deflate, + NC_NOTNC4_def_var_chunking, + + /* ... continue for all ~70 function pointers ... */ +}; +``` + +### Magic Numbers and Format Detection + +Magic numbers enable automatic format detection when opening files. + +**How Magic Numbers Work**: + +1. When `nc_open()` is called without a specific format flag +2. The file's first bytes are read +3. They are compared against all registered magic numbers (built-in and user-defined) +4. If a match is found, the corresponding dispatcher is used + +**Magic Number Best Practices**: + +- Use unique, distinctive strings (4-8 bytes recommended) +- Place at the beginning of your file format +- Avoid conflicts with existing formats: + - NetCDF-3: "CDF\001", "CDF\002", "CDF\005" + - HDF5/NetCDF-4: "\211HDF\r\n\032\n" +- Maximum length: `NC_MAX_MAGIC_NUMBER_LEN` bytes + +### Platform Considerations + +**Unix/Linux/macOS**: + +- Shared libraries: `.so` extension +- Dynamic loading: `dlopen()` and `dlsym()` +- Library paths: Use absolute paths or ensure libraries are in `LD_LIBRARY_PATH` + +**Windows**: + +- Shared libraries: `.dll` extension +- Dynamic loading: `LoadLibrary()` and `GetProcAddress()` +- Library paths: Use absolute paths or ensure DLLs are in system `PATH` + +**Building Plugins**: + +Unix: + +```bash +gcc -shared -fPIC -o libmyplugin.so myplugin.c -lnetcdf +``` + +Windows: + +```batch +cl /LD myplugin.c netcdf.lib +``` + +### Security Considerations + +- **Full paths required**: RC files must specify absolute library paths to prevent path injection attacks +- **Code execution**: Plugins execute arbitrary code in your process; only load trusted libraries +- **Validation**: The library verifies dispatch table ABI version but cannot validate plugin behavior +- **Permissions**: Ensure plugin libraries have appropriate file permissions + +### Common Errors + +**NC_EINVAL: Invalid dispatch table version** + +- Cause: Plugin was compiled against a different version of NetCDF-C +- Solution: Recompile plugin against current NetCDF-C version + +**Plugin not loaded (no error)** + +- Cause: Partial RC configuration (LIBRARY without INIT, or vice versa) +- Solution: Check that both LIBRARY and INIT keys are present in RC file + +**Library not found** + +- Cause: Incorrect path in NETCDF.UDF\*.LIBRARY +- Solution: Use absolute path; verify file exists and has correct permissions + +**Init function not found** + +- Cause: Function name mismatch or missing export +- Solution: Verify function name matches INIT key; ensure function is exported (not static) + +### Testing UDFs + +**Enable Logging**: + +```bash +export NC_LOG_LEVEL=3 +./myprogram +``` + +**Verify RC File is Read**: + +```bash +echo "NETCDF.UDF0.LIBRARY=/tmp/test.so" > ~/.ncrc +echo "NETCDF.UDF0.INIT=test_init" >> ~/.ncrc +# Run program and check for warnings about missing library +``` + +**Check Plugin Exports** (Unix): + +```bash +nm -D libmyplugin.so | grep init +``` + +**Check Plugin Exports** (Windows): + +```batch +dumpbin /EXPORTS myplugin.dll +``` + +## Dispatch Table Selection + +**File**: `libdispatch/dinfermodel.c` + +### Selection Logic + +1. **Magic Number Detection**: + - CDF1: `0x43 0x44 0x46 0x01` ("CDF\001") + - CDF2: `0x43 0x44 0x46 0x02` ("CDF\002") + - CDF5: `0x43 0x44 0x46 0x05` ("CDF\005") + - HDF5: `0x89 0x48 0x44 0x46 0x0d 0x0a 0x1a 0x0a` + - User-defined: Custom magic numbers + +2. **URL Scheme Parsing**: + - `http://`, `https://` → DAP2 or DAP4 + - `s3://` → Zarr with S3 backend + - `file://` → Local file (check magic) + +3. **Mode Flags**: + - `NC_NETCDF4` → HDF5 or Zarr + - `NC_CLASSIC_MODEL` → NetCDF-3 API with NetCDF-4 file + - `NC_64BIT_OFFSET` → CDF2 + - `NC_64BIT_DATA` → CDF5 + - `NC_ZARR` → Zarr format + +4. **File Extension** (hints): + - `.nc` → NetCDF-3 or NetCDF-4 + - `.nc4` → NetCDF-4/HDF5 + - `.h5`, `.hdf5` → HDF5 + - `.zarr` → Zarr + +### Dispatch Table Registration + +**Initialization** (called at library startup): + +```c +NCDISPATCH_initialize() + → NC3_initialize() // Sets NC3_dispatch_table + → NC_HDF5_initialize() // Sets HDF5_dispatch_table + → NCZ_initialize() // Sets NCZ_dispatch_table + → NCD2_initialize() // Sets NCD2_dispatch_table + → NCD4_initialize() // Sets NCD4_dispatch_table +``` + +## Function Pointer Conventions + +### Return Values + +- `NC_NOERR` (0) on success +- Negative error codes on failure +- `NC_ENOTNC4` for unsupported NetCDF-4 features +- `NC_EINVAL` for invalid parameters + +### Common Stubs + +**NC*NOOP*\*** - No-operation stubs (return NC\*NOERR) +\*\*NC_NOTNC4**\*\* - Not-NetCDF-4 stubs (return NC_ENOTNC4) +**NCDEFAULT\_\*\*\* - Default implementations (in libdispatch) + +### NCDEFAULT Implementations + +**File**: `libdispatch/dvar.c` + +- `NCDEFAULT_get_vars()` - Implements strided access using get_vara +- `NCDEFAULT_put_vars()` - Implements strided writes using put_vara +- `NCDEFAULT_get_varm()` - Implements mapped access using get_vars +- `NCDEFAULT_put_varm()` - Implements mapped writes using put_vars + +## Dispatch Version History + +- **Version 1**: Original dispatch table +- **Version 2**: Added filter operations +- **Version 3**: Replaced filteractions with specific filter functions +- **Version 4**: Added quantization support +- **Version 5**: Current version (additional enhancements) + +**Compatibility**: Dispatch tables must match the library's dispatch version exactly. + +## Testing Dispatch Tables + +Each format has its own test suite: + +- `nc_test/` - NetCDF-3 tests +- `nc_test4/` - NetCDF-4/HDF5 tests +- `nczarr_test/` - Zarr tests +- `ncdap_test/` - DAP2 tests + +**Dispatch testing**: Tests verify that operations route correctly and return appropriate errors for unsupported features. diff --git a/windsurf-harnett/skills/netcdf-architecture/references/EXAMPLES.md b/windsurf-harnett/skills/netcdf-architecture/references/EXAMPLES.md new file mode 100644 index 000000000..0c3aa5fe8 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/references/EXAMPLES.md @@ -0,0 +1,499 @@ +# NetCDF-C Programming Examples and Patterns + +This reference provides practical examples and common programming patterns for working with NetCDF-C, based on official documentation and example programs. + +## Overview + +NetCDF-C provides example programs demonstrating: + +- Basic file creation and reading (NetCDF-3) +- Enhanced features (NetCDF-4: groups, compression, user-defined types) +- Real-world patterns (meteorological data, time series, multidimensional arrays) + +**Example Location**: `examples/C/` directory in NetCDF-C source + +**Documentation**: https://docs.unidata.ucar.edu/netcdf-c/current/examples1.html + +## Basic NetCDF-3 Examples + +### Example 1: simple_xy - Minimal File Operations + +**Files**: `simple_xy_wr.c`, `simple_xy_rd.c` + +**Purpose**: Demonstrates absolute minimum operations to create and read a NetCDF file. + +**What it creates**: 2D array (6x12) with dimensions "x" and "y", variable "data" + +**Key Pattern - File Creation**: + +```c +int ncid, x_dimid, y_dimid, varid; +int dimids[2]; + +// Create file (NC_CLOBBER overwrites existing) +nc_create("simple_xy.nc", NC_CLOBBER, &ncid); + +// Define dimensions +nc_def_dim(ncid, "x", NX, &x_dimid); +nc_def_dim(ncid, "y", NY, &y_dimid); + +// Define variable with dimensions +dimids[0] = x_dimid; +dimids[1] = y_dimid; +nc_def_var(ncid, "data", NC_INT, 2, dimids, &varid); + +// End define mode (required before writing data) +nc_enddef(ncid); + +// Write data +nc_put_var_int(ncid, varid, &data[0][0]); + +// Close file +nc_close(ncid); +``` + +**Key Pattern - File Reading**: + +```c +int ncid, varid; +int data_in[NX][NY]; + +// Open file for reading +nc_open("simple_xy.nc", NC_NOWRITE, &ncid); + +// Get variable ID by name +nc_inq_varid(ncid, "data", &varid); + +// Read entire variable +nc_get_var_int(ncid, varid, &data_in[0][0]); + +// Close file +nc_close(ncid); +``` + +### Example 2: sfc_pres_temp - Adding Metadata + +**Files**: `sfc_pres_temp_wr.c`, `sfc_pres_temp_rd.c` + +**Purpose**: Demonstrates adding attributes and coordinate variables (CF conventions). + +**What it creates**: Surface temperature and pressure on 6x12 lat/lon grid with metadata + +**Key Pattern - Adding Attributes**: + +```c +// Define variable +nc_def_var(ncid, "temperature", NC_FLOAT, 2, dimids, &temp_varid); + +// Add units attribute +nc_put_att_text(ncid, temp_varid, "units", 11, "celsius"); + +// Add global attribute +nc_put_att_text(ncid, NC_GLOBAL, "title", 23, "Surface Temperature Data"); +``` + +**Key Pattern - Coordinate Variables**: + +```c +// Define latitude dimension +nc_def_dim(ncid, "latitude", NLAT, &lat_dimid); + +// Define latitude coordinate variable (same name as dimension) +nc_def_var(ncid, "latitude", NC_FLOAT, 1, &lat_dimid, &lat_varid); +nc_put_att_text(ncid, lat_varid, "units", 13, "degrees_north"); + +// Write coordinate values +float lats[NLAT] = {25, 30, 35, 40, 45, 50}; +nc_put_var_float(ncid, lat_varid, lats); +``` + +**Best Practice**: Coordinate variables should have the same name as their dimension and include units. + +### Example 3: pres_temp_4D - Unlimited Dimensions and Time Series + +**Files**: `pres_temp_4D_wr.c`, `pres_temp_4D_rd.c` + +**Purpose**: Demonstrates 4D data with unlimited time dimension, writing one timestep at a time. + +**What it creates**: Temperature and pressure with dimensions [time, level, lat, lon] + +**Key Pattern - Unlimited Dimension**: + +```c +// Define unlimited dimension (use NC_UNLIMITED for size) +nc_def_dim(ncid, "time", NC_UNLIMITED, &time_dimid); + +// Define variable with unlimited dimension first +int dimids[4] = {time_dimid, level_dimid, lat_dimid, lon_dimid}; +nc_def_var(ncid, "temperature", NC_FLOAT, 4, dimids, &temp_varid); +``` + +**Key Pattern - Writing Time Steps**: + +```c +// Write one time step at a time +for (int rec = 0; rec < NREC; rec++) { + size_t start[4] = {rec, 0, 0, 0}; // Start at this time step + size_t count[4] = {1, NLVL, NLAT, NLON}; // Write one time slice + + // Prepare data for this time step + float temp_out[NLVL][NLAT][NLON]; + // ... fill temp_out ... + + // Write hyperslab + nc_put_vara_float(ncid, temp_varid, start, count, &temp_out[0][0][0]); +} +``` + +**Key Pattern - Reading Time Steps**: + +```c +// Read one time step +size_t start[4] = {rec, 0, 0, 0}; +size_t count[4] = {1, NLVL, NLAT, NLON}; +nc_get_vara_float(ncid, temp_varid, start, count, &temp_in[0][0][0]); +``` + +## NetCDF-4 Enhanced Examples + +### Example 4: simple_nc4 - Groups and User-Defined Types + +**Files**: `simple_nc4_wr.c`, `simple_nc4_rd.c` + +**Purpose**: Demonstrates NetCDF-4 groups and compound types. + +**What it creates**: Two groups with different data types (uint64 and compound) + +**Key Pattern - Creating Groups**: + +```c +int ncid, grp1_id, grp2_id; + +// Create NetCDF-4 file +nc_create("simple_nc4.nc", NC_NETCDF4, &ncid); + +// Create groups +nc_def_grp(ncid, "grp1", &grp1_id); +nc_def_grp(ncid, "grp2", &grp2_id); + +// Define variable in group +nc_def_var(grp1_id, "data", NC_UINT64, 2, dimids, &varid); +``` + +**Key Pattern - Compound Types**: + +```c +typedef struct { + int i1; + int i2; +} compound_data; + +nc_type compound_typeid; + +// Define compound type +nc_def_compound(grp2_id, sizeof(compound_data), "compound_t", &compound_typeid); +nc_insert_compound(grp2_id, compound_typeid, "i1", + NC_COMPOUND_OFFSET(compound_data, i1), NC_INT); +nc_insert_compound(grp2_id, compound_typeid, "i2", + NC_COMPOUND_OFFSET(compound_data, i2), NC_INT); + +// Use compound type for variable +nc_def_var(grp2_id, "data", compound_typeid, 2, dimids, &varid); +``` + +### Example 5: simple_xy_nc4 - Compression and Chunking + +**Files**: `simple_xy_nc4_wr.c`, `simple_xy_nc4_rd.c` + +**Purpose**: Demonstrates chunking, compression, and checksums (HDF5 features). + +**Key Pattern - Chunking**: + +```c +// Define variable +nc_def_var(ncid, "data", NC_INT, 2, dimids, &varid); + +// Set chunking (required for compression) +size_t chunks[2] = {4, 4}; // Chunk size for each dimension +nc_def_var_chunking(ncid, varid, NC_CHUNKED, chunks); +``` + +**Key Pattern - Compression**: + +```c +// Enable deflate compression (level 1-9, 9 = maximum compression) +int shuffle = NC_SHUFFLE; // Shuffle filter improves compression +int deflate = 1; // Enable deflate +int deflate_level = 5; // Compression level +nc_def_var_deflate(ncid, varid, shuffle, deflate, deflate_level); +``` + +**Key Pattern - Checksums**: + +```c +// Enable fletcher32 checksum for data integrity +nc_def_var_fletcher32(ncid, varid, NC_FLETCHER32); +``` + +### Example 6: filter_example - Custom Filters + +**Files**: `filter_example.c` + +**Purpose**: Demonstrates using custom compression filters (e.g., bzip2). + +**Key Pattern - Custom Filter**: + +```c +// Define variable with chunking (required for filters) +nc_def_var(ncid, "data", NC_INT, 2, dimids, &varid); +size_t chunks[2] = {100, 100}; +nc_def_var_chunking(ncid, varid, NC_CHUNKED, chunks); + +// Apply custom filter (bzip2 example) +unsigned int filter_id = 307; // Bzip2 filter ID +size_t nparams = 1; +unsigned int params[1] = {9}; // Compression level +nc_def_var_filter(ncid, varid, filter_id, nparams, params); +``` + +## Common Programming Patterns + +### Pattern 1: Error Handling + +**Always check return codes**: + +```c +int retval; + +if ((retval = nc_create(FILE_NAME, NC_CLOBBER, &ncid))) + ERR(retval); + +// Or use macro +#define ERR(e) {printf("Error: %s\n", nc_strerror(e)); return 2;} +``` + +### Pattern 2: Inquiry Functions + +**Get file information without prior knowledge**: + +```c +int ncid, ndims, nvars, ngatts, unlimdimid; + +// Open file +nc_open("file.nc", NC_NOWRITE, &ncid); + +// Get file metadata +nc_inq(ncid, &ndims, &nvars, &ngatts, &unlimdimid); + +// Inquire about specific dimension +char dim_name[NC_MAX_NAME+1]; +size_t dim_len; +nc_inq_dim(ncid, dimid, dim_name, &dim_len); + +// Inquire about variable +char var_name[NC_MAX_NAME+1]; +nc_type var_type; +int var_ndims, var_dimids[NC_MAX_VAR_DIMS], var_natts; +nc_inq_var(ncid, varid, var_name, &var_type, &var_ndims, + var_dimids, &var_natts); +``` + +### Pattern 3: Subsetting Data (Hyperslabs) + +**Read/write portions of arrays**: + +```c +// Read a subset: time=5, all levels, lat 10-20, lon 30-40 +size_t start[4] = {5, 0, 10, 30}; +size_t count[4] = {1, NLVL, 10, 10}; +float subset[NLVL][10][10]; + +nc_get_vara_float(ncid, varid, start, count, &subset[0][0][0]); +``` + +### Pattern 4: Strided Access + +**Read every Nth element**: + +```c +// Read every 2nd element in each dimension +size_t start[2] = {0, 0}; +size_t count[2] = {NX/2, NY/2}; +ptrdiff_t stride[2] = {2, 2}; // Skip every other element + +nc_get_vars_float(ncid, varid, start, count, stride, data); +``` + +### Pattern 5: Fill Values + +**Handle missing data**: + +```c +// Set custom fill value +float fill_value = -999.0; +nc_def_var_fill(ncid, varid, NC_FILL, &fill_value); + +// Disable fill values (for performance) +nc_def_var_fill(ncid, varid, NC_NOFILL, NULL); +``` + +### Pattern 6: Parallel I/O (NetCDF-4 with HDF5) + +**MPI parallel access**: + +```c +#include + +// Initialize MPI +MPI_Init(&argc, &argv); +MPI_Comm_size(MPI_COMM_WORLD, &nprocs); +MPI_Comm_rank(MPI_COMM_WORLD, &rank); + +// Create file with parallel access +nc_create_par("parallel.nc", NC_NETCDF4|NC_MPIIO, + MPI_COMM_WORLD, MPI_INFO_NULL, &ncid); + +// Set collective access for variable +nc_var_par_access(ncid, varid, NC_COLLECTIVE); + +// Each process writes its portion +size_t start[1] = {rank * chunk_size}; +size_t count[1] = {chunk_size}; +nc_put_vara_float(ncid, varid, start, count, local_data); + +// Close and finalize +nc_close(ncid); +MPI_Finalize(); +``` + +## Best Practices from Examples + +### File Creation + +1. **Always use NC_CLOBBER or NC_NOCLOBBER** to control overwrite behavior +2. **Classic CDF-1 is the default**: `nc_create(path, NC_CLOBBER, &ncid)` creates a classic file — no format flag needed +3. **NC_CLASSIC_MODEL is only for NetCDF-4**: Use `NC_NETCDF4 | NC_CLASSIC_MODEL` to get HDF5 storage with classic data model restrictions (no groups, no user-defined types). Do NOT use `NC_CLASSIC_MODEL` alone for classic CDF-1 files. +4. **End define mode** with `nc_enddef()` before writing data +5. **Close files** with `nc_close()` to ensure data is flushed + +### Dimensions + +1. **Unlimited dimension first** in dimension order for best performance +2. **One unlimited dimension** in NetCDF-3, multiple allowed in NetCDF-4 +3. **Coordinate variables** should match dimension names + +### Variables + +1. **Add units attribute** to all data variables (CF convention) +2. **Use appropriate data types** (NC_FLOAT for most scientific data) +3. **Enable chunking** before compression or filters + +### Attributes + +1. **Use standard names** from CF conventions when possible +2. **Add global attributes** for file-level metadata (title, history, etc.) +3. **Document missing values** with \_FillValue or missing_value attributes + +### Performance + +1. **Use chunking** for large datasets accessed in subsets +2. **Enable compression** to reduce file size (deflate level 5 is good default) +3. **Write contiguously** when possible (avoid random access) +4. **Use collective I/O** in parallel applications + +### NetCDF-4 Features + +1. **Use groups** to organize related variables +2. **Compound types** for structured data (like C structs) +3. **Compression** is transparent to readers +4. **Checksums** ensure data integrity + +## Tutorial Topics + +The NetCDF-C tutorial covers these key areas: + +### Data Model + +- **Classic Model**: Dimensions, variables, attributes +- **Enhanced Model**: Groups, user-defined types, multiple unlimited dimensions +- **Unlimited Dimensions**: Growing datasets (time series) +- **Strings**: NC_STRING type in NetCDF-4 + +### File Operations + +- **Creating Files**: Define mode vs data mode +- **Reading Known Structure**: When you know the schema +- **Reading Unknown Structure**: Generic file inspection +- **Subsets**: Hyperslabs, strides, mapped access + +### Advanced Topics + +- **Error Handling**: Return codes and nc_strerror() +- **HDF5 Interoperability**: Reading HDF5 files as NetCDF +- **Parallel I/O**: MPI-based parallel access +- **Fill Values**: Handling missing/unwritten data + +## Command-Line Tools + +### ncdump - Examine Files + +```bash +# View file structure +ncdump -h file.nc + +# View data +ncdump file.nc + +# View as CDL +ncdump -c file.nc +``` + +### ncgen - Generate Files from CDL + +```bash +# Create NetCDF file from CDL +ncgen -o output.nc input.cdl + +# Create NetCDF-4 file +ncgen -k nc4 -o output.nc input.cdl +``` + +### nccopy - Copy and Convert + +```bash +# Convert NetCDF-3 to NetCDF-4 with compression +nccopy -k nc4 -d 5 input.nc output.nc + +# Rechunk file +nccopy -c "var:10,20,30" input.nc output.nc +``` + +## Example File Locations + +In the NetCDF-C source tree: + +- `examples/C/` - C examples +- `examples/CDL/` - CDL files for ncgen +- `nc_test/` - Test programs (also good examples) +- `nc_test4/` - NetCDF-4 test programs + +## Additional Resources + +**Official Documentation**: + +- Tutorial: https://docs.unidata.ucar.edu/netcdf-c/current/tutorial_8dox.html +- Examples: https://docs.unidata.ucar.edu/netcdf-c/current/examples1.html +- API Reference: https://docs.unidata.ucar.edu/netcdf-c/current/modules.html + +**CF Conventions**: + +- http://cfconventions.org/ - Climate and Forecast metadata conventions + +**Best Practices**: + +- Use coordinate variables for dimensions +- Include units attributes +- Add descriptive global attributes +- Follow CF conventions when applicable +- Enable compression for large datasets +- Use chunking for subsetting access patterns diff --git a/windsurf-harnett/skills/netcdf-architecture/references/FORTRAN-INTERFACE.md b/windsurf-harnett/skills/netcdf-architecture/references/FORTRAN-INTERFACE.md new file mode 100644 index 000000000..67d32bba9 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/references/FORTRAN-INTERFACE.md @@ -0,0 +1,389 @@ +# NetCDF Fortran 90 Interface Guide + +## Overview + +The NetCDF Fortran 90 interface provides a modern Fortran API to the NetCDF library. It wraps the underlying C library with type-safe Fortran 90 modules and interfaces. + +**Primary Module**: `netcdf` (defined in `netcdf.f90`) + +**Documentation**: https://docs.unidata.ucar.edu/netcdf-fortran/current/f90_The-NetCDF-Fortran-90-Interface-Guide.html + +## Key Concepts + +### Operating Modes + +NetCDF datasets operate in one of two modes: + +- **Define Mode**: Create dimensions, variables, and attributes. Cannot read/write variable data. +- **Data Mode**: Access data and modify existing attributes. Cannot create new dimensions, variables, or attributes. + +### Identifiers + +- **NetCDF ID**: Small non-negative integer returned when opening/creating a dataset +- **Variable ID**: Integer identifying a variable (1, 2, 3... in order of definition) +- **Dimension ID**: Integer identifying a dimension +- **Attribute**: Identified by name and associated variable/dataset + +## Common Usage Patterns + +### 1. Creating a NetCDF Dataset + +```fortran +use netcdf +implicit none + +integer :: ncid, status +integer :: x_dimid, y_dimid, time_dimid +integer :: temp_varid + +! Create dataset +status = nf90_create("output.nc", NF90_CLOBBER, ncid) +if (status /= NF90_NOERR) call handle_error(status) + +! Define dimensions (in define mode by default) +status = nf90_def_dim(ncid, "x", 100, x_dimid) +status = nf90_def_dim(ncid, "y", 50, y_dimid) +status = nf90_def_dim(ncid, "time", NF90_UNLIMITED, time_dimid) + +! Define variables +status = nf90_def_var(ncid, "temperature", NF90_FLOAT, & + [x_dimid, y_dimid, time_dimid], temp_varid) + +! Add attributes +status = nf90_put_att(ncid, temp_varid, "units", "celsius") + +! End define mode, enter data mode +status = nf90_enddef(ncid) + +! Write data +status = nf90_put_var(ncid, temp_varid, data_array) + +! Close dataset +status = nf90_close(ncid) +``` + +### 2. Reading a Dataset with Known Names + +```fortran +use netcdf +implicit none + +integer :: ncid, varid, status +real, allocatable :: data(:,:,:) + +! Open dataset for reading +status = nf90_open("input.nc", NF90_NOWRITE, ncid) + +! Get variable ID from name +status = nf90_inq_varid(ncid, "temperature", varid) + +! Read data +status = nf90_get_var(ncid, varid, data) + +! Close dataset (optional for read-only) +status = nf90_close(ncid) +``` + +### 3. Reading a Dataset with Unknown Names + +```fortran +use netcdf +implicit none + +integer :: ncid, status +integer :: ndims, nvars, ngatts, unlimdimid +integer :: i, varid +character(len=NF90_MAX_NAME) :: varname + +! Open dataset +status = nf90_open("input.nc", NF90_NOWRITE, ncid) + +! Inquire about dataset contents +status = nf90_inquire(ncid, ndims, nvars, ngatts, unlimdimid) + +! Loop through variables +do i = 1, nvars + status = nf90_inquire_variable(ncid, i, name=varname) + print *, "Variable: ", trim(varname) +end do + +status = nf90_close(ncid) +``` + +## Core Function Categories + +### Dataset Operations + +- **`NF90_CREATE`**: Create a new NetCDF dataset +- **`NF90_OPEN`**: Open an existing dataset +- **`NF90_CLOSE`**: Close an open dataset +- **`NF90_REDEF`**: Enter define mode +- **`NF90_ENDDEF`**: Exit define mode, enter data mode +- **`NF90_SYNC`**: Synchronize dataset to disk +- **`NF90_ABORT`**: Close dataset without saving changes +- **`NF90_INQUIRE`**: Get information about dataset +- **`NF90_SET_FILL`**: Set fill mode for variables + +### Dimension Operations + +- **`NF90_DEF_DIM`**: Define a dimension +- **`NF90_INQ_DIMID`**: Get dimension ID from name +- **`NF90_INQUIRE_DIMENSION`**: Get dimension information +- **`NF90_RENAME_DIM`**: Rename a dimension + +### Variable Operations + +- **`NF90_DEF_VAR`**: Define a variable +- **`NF90_INQ_VARID`**: Get variable ID from name +- **`NF90_INQUIRE_VARIABLE`**: Get variable information +- **`NF90_PUT_VAR`**: Write data to a variable +- **`NF90_GET_VAR`**: Read data from a variable +- **`NF90_RENAME_VAR`**: Rename a variable +- **`NF90_DEF_VAR_FILL`**: Define fill parameters +- **`NF90_INQ_VAR_FILL`**: Get fill parameters +- **`NF90_DEF_VAR_FILTER`**: Define filter/compression +- **`NF90_INQ_VAR_FILTER`**: Get filter information + +### Attribute Operations + +- **`NF90_PUT_ATT`**: Write an attribute +- **`NF90_GET_ATT`**: Read an attribute +- **`NF90_INQ_ATTNAME`**: Get attribute name from number +- **`NF90_INQUIRE_ATTRIBUTE`**: Get attribute information +- **`NF90_RENAME_ATT`**: Rename an attribute +- **`NF90_DEL_ATT`**: Delete an attribute +- **`NF90_COPY_ATT`**: Copy attribute to another variable + +### Group Operations (NetCDF-4) + +- **`NF90_DEF_GRP`**: Create a group +- **`NF90_INQ_NCID`**: Get group ID from name +- **`NF90_INQ_GRPS`**: Get child group IDs +- **`NF90_INQ_GRPNAME`**: Get group name +- **`NF90_RENAME_GRP`**: Rename a group + +### User-Defined Types (NetCDF-4) + +- **`NF90_DEF_COMPOUND`**: Define compound type +- **`NF90_DEF_VLEN`**: Define variable-length type +- **`NF90_DEF_OPAQUE`**: Define opaque type +- **`NF90_DEF_ENUM`**: Define enumeration type + +## Data Types + +### NetCDF External Types and Fortran Constants + +| NetCDF Type | Fortran 90 Constant | Bits | +| ----------- | ------------------- | ---- | +| byte | NF90_BYTE | 8 | +| char | NF90_CHAR | 8 | +| short | NF90_SHORT | 16 | +| int | NF90_INT | 32 | +| float | NF90_FLOAT | 32 | +| double | NF90_DOUBLE | 64 | +| ubyte | NF90_UBYTE | 8 | +| ushort | NF90_USHORT | 16 | +| uint | NF90_UINT | 32 | +| int64 | NF90_INT64 | 64 | +| uint64 | NF90_UINT64 | 64 | +| string | NF90_STRING | - | + +## Variable I/O Flexibility + +The `NF90_PUT_VAR` and `NF90_GET_VAR` functions support flexible data access: + +### Basic Usage + +```fortran +! Write entire array +status = nf90_put_var(ncid, varid, data_array) + +! Read entire array +status = nf90_get_var(ncid, varid, data_array) +``` + +### Subsetting with start/count + +```fortran +! Write a subset starting at index (10,20) with size (5,10) +status = nf90_put_var(ncid, varid, data_array, & + start=[10,20], count=[5,10]) +``` + +### Strided Access + +```fortran +! Read every other element +status = nf90_get_var(ncid, varid, data_array, & + start=[1,1], count=[50,25], stride=[2,2]) +``` + +### Mapped Access + +```fortran +! Non-contiguous memory mapping +status = nf90_put_var(ncid, varid, data_array, & + start=[1,1], count=[10,10], map=[1,100]) +``` + +## Error Handling + +All NetCDF Fortran functions return an integer status code: + +```fortran +integer :: status + +status = nf90_open("file.nc", NF90_NOWRITE, ncid) +if (status /= NF90_NOERR) then + print *, trim(nf90_strerror(status)) + stop "Error opening file" +end if +``` + +### Common Error Codes + +- **`NF90_NOERR`**: No error +- **`NF90_EBADID`**: Invalid NetCDF ID +- **`NF90_ENOTVAR`**: Variable not found +- **`NF90_EINDEFINE`**: Operation not allowed in define mode +- **`NF90_ENOTINDEFINE`**: Operation requires define mode +- **`NF90_EINVAL`**: Invalid argument + +### Error Message Function + +**`NF90_STRERROR(status)`**: Returns descriptive error message string + +## Mode Flags + +### File Creation/Opening Modes + +- **`NF90_NOWRITE`**: Open read-only +- **`NF90_WRITE`**: Open for writing +- **`NF90_CLOBBER`**: Overwrite existing file +- **`NF90_NOCLOBBER`**: Fail if file exists +- **`NF90_SHARE`**: Disable buffering for immediate writes +- **`NF90_NETCDF4`**: Create NetCDF-4/HDF5 file +- **`NF90_CLASSIC_MODEL`**: Use classic data model with NetCDF-4 +- **`NF90_64BIT_OFFSET`**: Use CDF-2 format (large file support) +- **`NF90_64BIT_DATA`**: Use CDF-5 format (large variable support) + +### Variable Storage Options (NetCDF-4) + +- **`NF90_CHUNKED`**: Use chunked storage +- **`NF90_CONTIGUOUS`**: Use contiguous storage +- **`NF90_COMPACT`**: Use compact storage (small variables) + +### Compression Options (NetCDF-4) + +```fortran +! Define variable with compression +status = nf90_def_var(ncid, "data", NF90_FLOAT, dimids, varid, & + deflate_level=6, shuffle=.true.) +``` + +## Parallel I/O (NetCDF-4 with MPI) + +```fortran +use netcdf +use mpi +implicit none + +integer :: ncid, varid, status +integer :: comm, info + +! Initialize MPI +call MPI_Init(ierr) +comm = MPI_COMM_WORLD +info = MPI_INFO_NULL + +! Create parallel file +status = nf90_create_par("parallel.nc", & + IOR(NF90_NETCDF4, NF90_MPIIO), & + comm, info, ncid) + +! Set collective access +status = nf90_var_par_access(ncid, varid, NF90_COLLECTIVE) + +! Each process writes its portion +status = nf90_put_var(ncid, varid, local_data, & + start=[my_start], count=[my_count]) + +status = nf90_close(ncid) +call MPI_Finalize(ierr) +``` + +## Best Practices + +### 1. Always Check Return Status + +```fortran +if (status /= NF90_NOERR) call handle_error(status) +``` + +### 2. Close Files Explicitly + +```fortran +status = nf90_close(ncid) +``` + +### 3. Use NF90_SYNC for Critical Data + +```fortran +! Ensure data is written to disk +status = nf90_sync(ncid) +``` + +### 4. Minimize Define Mode Transitions + +Define all dimensions, variables, and attributes before entering data mode to avoid performance overhead. + +### 5. Use Chunking for Large Arrays (NetCDF-4) + +```fortran +status = nf90_def_var(ncid, "data", NF90_FLOAT, dimids, varid, & + chunksizes=[100,100,1]) +``` + +### 6. Enable Compression for Large Datasets + +```fortran +status = nf90_def_var(ncid, "data", NF90_FLOAT, dimids, varid, & + deflate_level=4, shuffle=.true.) +``` + +## Relationship to C Library + +The Fortran 90 interface is a wrapper around the NetCDF-C library: + +1. **Module**: `netcdf.f90` provides Fortran 90 interfaces +2. **Binding**: Calls C functions via ISO_C_BINDING +3. **Naming**: Fortran functions use `NF90_` prefix (C uses `nc_`) +4. **Types**: Fortran constants map to C types +5. **Arrays**: Fortran column-major order vs C row-major (handled internally) + +## Integration with NetCDF-C Architecture + +The Fortran interface sits on top of the C library dispatch architecture: + +``` +Fortran 90 Application + ↓ +netcdf.f90 module (NF90_* functions) + ↓ +ISO_C_BINDING layer + ↓ +NetCDF-C API (nc_* functions) + ↓ +libdispatch (dispatch tables) + ↓ +Format-specific implementations +(NC3, HDF5, Zarr, DAP) +``` + +All format support, dispatch routing, and I/O operations are handled by the underlying C library. + +## Additional Resources + +- **Main Guide**: https://docs.unidata.ucar.edu/netcdf-fortran/current/ +- **API Reference**: https://docs.unidata.ucar.edu/netcdf-fortran/current/f90_The-NetCDF-Fortran-90-Interface-Guide.html +- **Examples**: https://docs.unidata.ucar.edu/netcdf-fortran/current/examples.html diff --git a/windsurf-harnett/skills/netcdf-architecture/references/UDF-PLUGINS.md b/windsurf-harnett/skills/netcdf-architecture/references/UDF-PLUGINS.md new file mode 100644 index 000000000..de353e882 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-architecture/references/UDF-PLUGINS.md @@ -0,0 +1,566 @@ +# NetCDF-C User-Defined Format (UDF) Plugin Development + +This reference provides comprehensive guidance for developing UDF plugins for NetCDF-C. + +## Overview + +User-Defined Formats (UDFs) allow developers to extend NetCDF-C with custom file formats and storage backends through a plugin system. The library provides 10 independent UDF slots (UDF0-UDF9) that can be registered programmatically or via RC file configuration. + +## Plugin Architecture + +A UDF plugin consists of three main components: + +1. **Dispatch Table**: `NC_Dispatch` structure with function pointers implementing the netCDF API +2. **Initialization Function**: Called during plugin loading to register the dispatch table +3. **Format-Specific Code**: Implementation of file I/O and data operations + +## Plugin Lifecycle + +1. Library initialization (`nc_initialize()`) +2. RC file parsing (if configured) +3. Plugin library loading (`dlopen`/`LoadLibrary`) +4. Init function location (`dlsym`/`GetProcAddress`) +5. Init function execution +6. Dispatch table registration via `nc_def_user_format()` +7. Plugin remains loaded for process lifetime + +## Dispatch Table Implementation + +### Required Fields + +```c +typedef struct NC_Dispatch { + int model; /* NC_FORMATX_UDF0 through NC_FORMATX_UDF9 */ + int dispatch_version; /* Must be NC_DISPATCH_VERSION */ + + /* Function pointers for all netCDF operations (~70 total) */ + int (*create)(...); + int (*open)(...); + int (*close)(...); + int (*get_vara)(...); + int (*put_vara)(...); + /* ... many more functions ... */ +} NC_Dispatch; +``` + +**Location**: `include/netcdf_dispatch.h` + +### Minimal Example + +```c +#include "netcdf_dispatch.h" + +static NC_Dispatch my_dispatcher = { + NC_FORMATX_UDF0, /* Use UDF slot 0 */ + NC_DISPATCH_VERSION, /* Current ABI version */ + + NC_RO_create, /* Read-only: use predefined function */ + my_open, /* Custom open function */ + NC_RO_redef, + NC_RO__enddef, + NC_RO_sync, + my_abort, + my_close, + NC_RO_set_fill, + my_inq_format, + my_inq_format_extended, + + /* Inquiry functions - can use NC4_* defaults */ + NC4_inq, + NC4_inq_type, + NC4_inq_dimid, + NC4_inq_varid, + NC4_inq_unlimdim, + NC4_inq_grpname, + NC4_inq_grpname_full, + NC4_inq_grp_parent, + NC4_inq_grp_full_ncid, + NC4_inq_varids, + NC4_inq_dimids, + NC4_inq_typeids, + NC4_inq_type_equal, + NC4_inq_user_type, + NC4_inq_typeid, + + /* Variable I/O */ + my_get_vara, + NC_RO_put_vara, /* Read-only */ + NCDEFAULT_get_vars, /* Use default strided implementation */ + NC_RO_put_vars, + NCDEFAULT_get_varm, /* Use default mapped implementation */ + NC_RO_put_varm, + + /* Attributes */ + NC4_get_att, + NC_RO_put_att, + + /* Dimensions */ + NC4_inq_dim, + NC_RO_def_dim, + NC4_inq_unlimdims, + NC_RO_rename_dim, + + /* Variables */ + NC4_inq_var_all, + NC_RO_def_var, + NC_RO_rename_var, + NC4_var_par_access, + NC_RO_def_var_fill, + + /* NetCDF-4 features not supported */ + NC_NOTNC4_show_metadata, + NC_NOTNC4_inq_grps, + NC_NOTNC4_inq_ncid, + NC_NOTNC4_inq_format_extended, + NC_NOTNC4_inq_var_all, + NC_NOTNC4_def_grp, + NC_NOTNC4_rename_grp, + NC_NOTNC4_inq_user_type, + NC_NOTNC4_def_compound, + NC_NOTNC4_insert_compound, + NC_NOTNC4_insert_array_compound, + NC_NOTNC4_inq_compound_field, + NC_NOTNC4_inq_compound_fieldindex, + NC_NOTNC4_def_vlen, + NC_NOTNC4_def_enum, + NC_NOTNC4_def_opaque, + NC_NOTNC4_def_var_deflate, + NC_NOTNC4_def_var_fletcher32, + NC_NOTNC4_def_var_chunking, + NC_NOTNC4_def_var_endian, + NC_NOTNC4_def_var_filter, + NC_NOTNC4_set_var_chunk_cache, + NC_NOTNC4_get_var_chunk_cache, + NC_NOTNC4_inq_var_filter_ids, + NC_NOTNC4_inq_var_filter_info, + NC_NOTNC4_def_var_quantize, + NC_NOTNC4_inq_var_quantize, +}; +``` + +## Pre-defined Functions + +Use these for operations your format doesn't support: + +### Read-Only Stubs + +**File**: `libdispatch/dreadonly.c` + +Returns `NC_EPERM` (operation not permitted): + +- `NC_RO_create` - File creation +- `NC_RO_redef` - Enter define mode +- `NC_RO__enddef` - Leave define mode +- `NC_RO_sync` - Synchronize to disk +- `NC_RO_set_fill` - Set fill mode +- `NC_RO_def_dim` - Define dimension +- `NC_RO_def_var` - Define variable +- `NC_RO_rename_dim` - Rename dimension +- `NC_RO_rename_var` - Rename variable +- `NC_RO_put_att` - Write attribute +- `NC_RO_del_att` - Delete attribute +- `NC_RO_put_vara` - Write variable data +- `NC_RO_put_vars` - Write strided data +- `NC_RO_put_varm` - Write mapped data +- `NC_RO_def_var_fill` - Define fill value + +### Not NetCDF-4 Stubs + +**File**: `libdispatch/dnotnc4.c` + +Returns `NC_ENOTNC4` (not a NetCDF-4 file): + +- `NC_NOTNC4_def_grp` - Define group +- `NC_NOTNC4_rename_grp` - Rename group +- `NC_NOTNC4_def_compound` - Define compound type +- `NC_NOTNC4_def_vlen` - Define variable-length type +- `NC_NOTNC4_def_enum` - Define enumeration type +- `NC_NOTNC4_def_opaque` - Define opaque type +- `NC_NOTNC4_def_var_deflate` - Define compression +- `NC_NOTNC4_def_var_fletcher32` - Define checksums +- `NC_NOTNC4_def_var_chunking` - Define chunking +- `NC_NOTNC4_def_var_endian` - Define endianness +- `NC_NOTNC4_def_var_filter` - Define filter +- `NC_NOTNC4_def_var_quantize` - Define quantization + +### Default Implementations + +**File**: `libdispatch/dvar.c` + +Generic implementations built on simpler operations: + +- `NCDEFAULT_get_vars` - Strided read using `get_vara` +- `NCDEFAULT_put_vars` - Strided write using `put_vara` +- `NCDEFAULT_get_varm` - Mapped read using `get_vars` +- `NCDEFAULT_put_varm` - Mapped write using `put_vars` + +### NetCDF-4 Inquiry Functions + +**Files**: `libsrc4/*.c` + +Use internal metadata model for inquiry operations: + +- `NC4_inq` - Inquire about file +- `NC4_inq_type` - Inquire about type +- `NC4_inq_dimid` - Get dimension ID +- `NC4_inq_varid` - Get variable ID +- `NC4_inq_unlimdim` - Get unlimited dimension +- `NC4_inq_grpname` - Get group name +- `NC4_inq_varids` - Get all variable IDs +- `NC4_inq_dimids` - Get all dimension IDs +- `NC4_get_att` - Get attribute value +- `NC4_inq_var_all` - Get all variable info + +## Initialization Function + +### Function Signature + +```c +int plugin_init(void); +``` + +### Requirements + +1. Must be exported (not static) +2. Must call `nc_def_user_format()` to register dispatch table +3. Should return `NC_NOERR` on success, error code on failure +4. Name must match RC file INIT key + +### Example Implementation + +```c +#include + +/* Your dispatch table */ +extern NC_Dispatch my_dispatcher; + +/* Initialization function - must be exported */ +int my_plugin_init(void) +{ + int ret; + + /* Register dispatch table with magic number */ + ret = nc_def_user_format(NC_UDF0 | NC_NETCDF4, + &my_dispatcher, + "MYFMT"); + if (ret != NC_NOERR) + return ret; + + /* Additional initialization if needed */ + /* ... */ + + return NC_NOERR; +} +``` + +## Implementing Key Dispatch Functions + +### Open Function + +```c +int my_open(const char *path, int mode, int basepe, size_t *chunksizehintp, + void *parameters, const NC_Dispatch *dispatch, int ncid) +{ + /* 1. Open your file format */ + /* 2. Populate internal metadata structures */ + /* 3. Store format-specific data in NC->dispatchdata */ + /* 4. Return NC_NOERR on success */ + + return NC_NOERR; +} +``` + +### Close Function + +```c +int my_close(int ncid, void *v) +{ + /* 1. Clean up resources */ + /* 2. Close file handles */ + /* 3. Free format-specific data */ + + return NC_NOERR; +} +``` + +### Abort Function + +```c +int my_abort(int ncid, void *v) +{ + /* 1. Discard any pending changes */ + /* 2. Clean up resources */ + /* 3. Close file handles */ + + return NC_NOERR; +} +``` + +### Format Inquiry Functions + +```c +int my_inq_format(int ncid, int *formatp) +{ + if (formatp) + *formatp = NC_FORMAT_NETCDF4; /* Or appropriate format */ + return NC_NOERR; +} + +int my_inq_format_extended(int ncid, int *formatp, int *modep) +{ + if (formatp) + *formatp = NC_FORMATX_UDF0; + if (modep) + *modep = NC_UDF0 | NC_NETCDF4; + return NC_NOERR; +} +``` + +### Variable I/O Functions + +```c +int my_get_vara(int ncid, int varid, const size_t *start, + const size_t *count, void *value, nc_type memtype) +{ + /* 1. Validate parameters */ + /* 2. Read data from your format */ + /* 3. Convert to requested memory type if needed */ + /* 4. Copy to value buffer */ + + return NC_NOERR; +} +``` + +## Building Plugins + +### Unix/Linux/macOS + +**Makefile**: + +```makefile +CC = gcc +CFLAGS = -fPIC -I/usr/local/include +LDFLAGS = -shared -L/usr/local/lib -lnetcdf + +myplugin.so: myplugin.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +install: + cp myplugin.so /usr/local/lib/ +``` + +**Command line**: + +```bash +gcc -shared -fPIC -I/usr/local/include -o myplugin.so myplugin.c -lnetcdf +``` + +### Windows + +**Command line**: + +```batch +cl /LD /I"C:\netcdf\include" myplugin.c /link /LIBPATH:"C:\netcdf\lib" netcdf.lib +``` + +### CMake + +```cmake +cmake_minimum_required(VERSION 3.10) +project(MyPlugin) + +find_package(netCDF REQUIRED) + +add_library(myplugin SHARED myplugin.c) +target_link_libraries(myplugin netCDF::netcdf) +target_include_directories(myplugin PRIVATE ${netCDF_INCLUDE_DIRS}) + +install(TARGETS myplugin LIBRARY DESTINATION lib) +``` + +## Testing Plugins + +### Unit Testing + +```c +/* test_plugin.c */ +#include +#include + +extern NC_Dispatch my_dispatcher; +extern int my_plugin_init(void); + +int main() { + int ret; + NC_Dispatch *disp; + + /* Test initialization */ + ret = my_plugin_init(); + assert(ret == NC_NOERR); + + /* Verify registration */ + ret = nc_inq_user_format(NC_UDF0, &disp, NULL); + assert(ret == NC_NOERR); + assert(disp == &my_dispatcher); + + printf("Plugin tests passed\n"); + return 0; +} +``` + +### Integration Testing + +```c +/* test_integration.c */ +#include + +int main() { + int ncid, ret; + + /* Initialize and register plugin */ + my_plugin_init(); + + /* Test file operations */ + ret = nc_open("testfile.dat", NC_UDF0, &ncid); + if (ret != NC_NOERR) { + fprintf(stderr, "Open failed: %s\n", nc_strerror(ret)); + return 1; + } + + /* Test operations */ + int format; + nc_inq_format(ncid, &format); + + nc_close(ncid); + printf("Integration test passed\n"); + return 0; +} +``` + +### RC File Testing + +Create `.ncrc`: + +```ini +NETCDF.UDF0.LIBRARY=/path/to/myplugin.so +NETCDF.UDF0.INIT=my_plugin_init +NETCDF.UDF0.MAGIC=MYFMT +``` + +Test automatic loading: + +```c +int main() { + /* Plugin loads automatically during nc_initialize() */ + int ncid; + nc_open("file_with_magic.dat", 0, &ncid); /* Auto-detects format */ + nc_close(ncid); + return 0; +} +``` + +## Debugging + +### Enable NetCDF Logging + +```bash +export NC_LOG_LEVEL=3 +./test_program +``` + +### Check Symbol Exports + +**Unix**: + +```bash +nm -D libmyplugin.so | grep init +``` + +**Windows**: + +```batch +dumpbin /EXPORTS myplugin.dll +``` + +### GDB Debugging + +```bash +gdb ./test_program +(gdb) break my_plugin_init +(gdb) run +(gdb) backtrace +``` + +### Common Issues + +**Plugin not loaded**: + +- Check RC file syntax +- Verify both LIBRARY and INIT are present +- Use absolute path for LIBRARY + +**Init function not found**: + +- Ensure function is not static +- Check function name matches INIT key +- Verify symbol is exported + +**ABI version mismatch**: + +- Recompile against current netCDF-C headers +- Check `NC_DISPATCH_VERSION` value + +## Best Practices + +1. **Error Handling**: Return appropriate `NC_E*` error codes +2. **Memory Management**: Clean up in close/abort functions +3. **Thread Safety**: Use thread-safe operations if needed +4. **Logging**: Use `nclog` functions for diagnostic output +5. **Documentation**: Document your format and API +6. **Testing**: Test all code paths thoroughly +7. **Versioning**: Version your plugin and document compatibility + +## Magic Numbers + +### How They Work + +When `nc_open()` is called without a specific format flag: + +1. File's first bytes are read +2. Compared against all registered magic numbers +3. If match found, corresponding UDF dispatcher is used + +### Best Practices + +- Use unique, distinctive strings (4-8 bytes recommended) +- Place at beginning of file format +- Avoid conflicts with existing formats: + - NetCDF-3: "CDF\001", "CDF\002", "CDF\005" + - HDF5/NetCDF-4: "\211HDF\r\n\032\n" +- Maximum length: `NC_MAX_MAGIC_NUMBER_LEN` bytes + +### Example + +```c +/* File format with magic number */ +FILE *fp = fopen("mydata.dat", "wb"); +fwrite("MYDATA", 1, 6, fp); /* Magic number */ +/* ... write your data ... */ +fclose(fp); + +/* Register UDF with magic number */ +nc_def_user_format(NC_UDF0 | NC_NETCDF4, &my_dispatcher, "MYDATA"); + +/* Open automatically detects format */ +int ncid; +nc_open("mydata.dat", 0, &ncid); /* No mode flag needed! */ +``` + +## Reference Files + +- **Dispatch table definition**: `include/netcdf_dispatch.h` +- **Pre-defined functions**: `libdispatch/dreadonly.c`, `libdispatch/dnotnc*.c` +- **Example implementations**: `libhdf5/hdf5dispatch.c`, `libsrc/nc3dispatch.c` +- **Test plugins**: `nc_test4/test_plugin_lib.c` +- **Plugin loading**: `libdispatch/dutil.c`, `libdispatch/drc.c` +- **Registration API**: `libdispatch/ddispatch.c` diff --git a/windsurf-harnett/skills/netcdf-java/skill.md b/windsurf-harnett/skills/netcdf-java/skill.md new file mode 100644 index 000000000..9d678a928 --- /dev/null +++ b/windsurf-harnett/skills/netcdf-java/skill.md @@ -0,0 +1,331 @@ +# NetCDF-Java Library + +## Overview + +The netCDF-Java library is a 100% Java framework for reading and writing scientific data formats. It implements the Common Data Model (CDM), which is an abstract data model that merges netCDF, OPeNDAP, and HDF5 data models to create a unified API for accessing many types of scientific data. + +**Key Capabilities:** + +- Read netCDF-3, netCDF-4, HDF5, GRIB, BUFR, and many other scientific data formats +- Write netCDF-3 files natively +- Write netCDF-4 files via JNI to netCDF-C library +- Access remote datasets via OPeNDAP and other protocols +- Support for NcML (NetCDF Markup Language) for metadata manipulation and aggregation +- Coordinate system identification and georeferencing +- Scientific feature type support (grids, point data, radial data, etc.) + +## Documentation and Resources + +- **GitHub Repository:** https://github.com/Unidata/netcdf-java +- **Main Documentation:** https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ +- **API Reference:** Available through Maven artifacts +- **License:** BSD-3 (as of version 5.0) +- **Maven Repository:** https://artifacts.unidata.ucar.edu/ + +## Common Data Model (CDM) Architecture + +The CDM has three layers that build on each other: + +### 1. Data Access Layer (Syntactic Layer) + +Handles data reading and writing through: + +- **NetcdfFile:** Read-only access to datasets +- **NetcdfFiles:** Static methods for opening files +- **IOServiceProvider:** Interface for format-specific implementations +- **Variable, Dimension, Attribute, Group, Structure:** Metadata objects + +### 2. Coordinate System Layer + +Identifies coordinates of data arrays: + +- General coordinate concepts for scientific data +- Specialized georeferencing coordinate systems for Earth Science +- CoordinateAxis and CoordinateSystem objects + +### 3. Scientific Feature Types Layer + +Specialized methods for specific data types: + +- Grids +- Point data +- Radial data (radar, lidar) +- Station data +- Trajectory data + +## Basic Usage Patterns + +### Opening and Reading Files + +```java +// Open a NetCDF file +try (NetcdfFile ncfile = NetcdfFiles.open(pathToFile)) { + // File is automatically closed when try block exits + + // Find a variable by name + Variable v = ncfile.findVariable("temperature"); + if (v == null) { + System.err.println("Variable not found"); + return; + } + + // Read all data from the variable + Array data = v.read(); + + // Read a subset using section specification + // Format: "dim1_start:dim1_end:dim1_stride, dim2_start:dim2_end, ..." + Array subset = v.read("0:10:2, :, 5"); + +} catch (IOException e) { + e.printStackTrace(); +} +``` + +### Working with Metadata + +```java +try (NetcdfFile ncfile = NetcdfFiles.open(pathToFile)) { + // List all variables + for (Variable var : ncfile.getVariables()) { + System.out.println("Variable: " + var.getFullName()); + System.out.println(" Type: " + var.getDataType()); + System.out.println(" Shape: " + Arrays.toString(var.getShape())); + + // Get attributes + for (Attribute attr : var.attributes()) { + System.out.println(" Attribute: " + attr.getFullName() + " = " + attr.getValue()); + } + } + + // List dimensions + for (Dimension dim : ncfile.getDimensions()) { + System.out.println("Dimension: " + dim.getFullName() + " = " + dim.getLength()); + } + + // Get global attributes + for (Attribute attr : ncfile.getGlobalAttributes()) { + System.out.println("Global: " + attr.getFullName() + " = " + attr.getValue()); + } + +} catch (IOException e) { + e.printStackTrace(); +} +``` + +### Reading Data Arrays + +```java +// Read scalar data +Variable scalarVar = ncfile.findVariable("scalar_value"); +double scalarValue = scalarVar.readScalarDouble(); + +// Read 1D array +Variable var1d = ncfile.findVariable("time"); +Array timeData = var1d.read(); +int[] shape = timeData.getShape(); +for (int i = 0; i < shape[0]; i++) { + double value = timeData.getDouble(i); + System.out.println("time[" + i + "] = " + value); +} + +// Read multidimensional array +Variable var3d = ncfile.findVariable("temperature"); +Array tempData = var3d.read(); +Index index = tempData.getIndex(); +int[] shape3d = tempData.getShape(); +for (int t = 0; t < shape3d[0]; t++) { + for (int y = 0; y < shape3d[1]; y++) { + for (int x = 0; x < shape3d[2]; x++) { + double value = tempData.getDouble(index.set(t, y, x)); + } + } +} +``` + +### Array Section Syntax + +NetCDF-Java uses Fortran 90 array section syntax with zero-based indexing: + +- `":"` - all elements in dimension +- `"start:end"` - elements from start to end (inclusive) +- `"start:end:stride"` - elements with stride +- Example: `"0:10:2, :, 5"` means first dimension 0-10 with stride 2, all of second dimension, element 5 of third dimension + +## NetCDF Markup Language (NcML) + +NcML is an XML representation of netCDF metadata that can: + +- Describe netCDF file structure (similar to CDL) +- Modify existing datasets (add/change attributes, variables) +- Create virtual datasets through aggregation +- Define coordinate systems + +### Basic NcML Example + +```xml + + + + + + + + + +``` + +### NcML Aggregation + +NcML supports several aggregation types: + +- **joinExisting:** Concatenate along existing dimension +- **joinNew:** Create new dimension for aggregation +- **union:** Combine variables from multiple files +- **tiled:** Aggregate multidimensional tiles + +```xml + + + + + +``` + +## Advanced Features + +### Opening Remote Files + +```java +// OPeNDAP URL +NetcdfFile ncfile = NetcdfFiles.open("https://server.org/dods/dataset"); + +// HTTP Server +NetcdfFile ncfile = NetcdfFiles.open("https://server.org/data/file.nc"); + +// AWS S3 +NetcdfFile ncfile = NetcdfFiles.open("cdms3://bucket-name/path/to/file.nc"); +``` + +### Using NetcdfDataset for Enhanced Features + +```java +// NetcdfDataset provides coordinate system support and NcML processing +try (NetcdfDataset ncd = NetcdfDatasets.openDataset(pathToFile)) { + // Access coordinate systems + for (CoordinateSystem cs : ncd.getCoordinateSystems()) { + System.out.println("Coordinate System: " + cs.getName()); + for (CoordinateAxis axis : cs.getCoordinateAxes()) { + System.out.println(" Axis: " + axis.getFullName()); + } + } +} +``` + +### Disk Caching + +NetCDF-Java automatically handles compressed files (.Z, .zip, .gzip, .gz, .bz2) by uncompressing them to a disk cache before opening. + +## File Format Support + +The library can read many formats through IOServiceProvider implementations: + +- NetCDF-3 (classic and 64-bit offset) +- NetCDF-4 (HDF5-based) +- HDF4 and HDF5 +- GRIB (GRIB-1 and GRIB-2) +- BUFR +- NEXRAD Level 2 and Level 3 +- OPeNDAP (DAP2 and DAP4) +- Many others + +**Note:** Some formats require optional modules to be included as Maven/Gradle artifacts. + +## Maven/Gradle Integration + +### Maven Example + +```xml + + edu.ucar + cdm-core + 5.5.3 + + + + + edu.ucar + netcdfAll + 5.5.3 + +``` + +### Gradle Example + +```gradle +dependencies { + implementation 'edu.ucar:cdm-core:5.5.3' + // or for all formats + implementation 'edu.ucar:netcdfAll:5.5.3' +} +``` + +## ToolsUI Application + +ToolsUI is a graphical application for browsing and debugging NetCDF files: + +- Download: `toolsUI.jar` from netCDF-Java downloads page +- Run: `java -Xmx1g -jar toolsUI.jar` +- Features: Browse metadata, view data, test coordinate systems, debug IOSPs + +## Best Practices + +1. **Always use try-with-resources** to ensure files are properly closed +2. **Read metadata first** - structural metadata is loaded at open time, data is lazy-loaded +3. **Use section specifications** to read subsets of large arrays +4. **Check for null** when finding variables or attributes +5. **Use NetcdfDataset** when you need coordinate system support +6. **Cache remote files** for better performance with repeated access +7. **Use appropriate data types** - Array provides type-specific getters (getDouble, getFloat, etc.) + +## Common Pitfalls + +- **Zero-based indexing:** Unlike Fortran, Java uses zero-based array indexing +- **Write limitations:** Native Java can only write netCDF-3; netCDF-4 requires C library via JNI +- **Module dependencies:** Some file formats require additional Maven artifacts +- **Memory management:** Large arrays can consume significant memory; use sections when possible +- **Thread safety:** NetcdfFile objects are not thread-safe; use one per thread or synchronize access + +## Integration with THREDDS + +The THREDDS Data Server (TDS) is built on top of netCDF-Java and provides: + +- Remote data access via OPeNDAP, WCS, WMS, HTTP +- Catalog services for dataset discovery +- Aggregation and virtual dataset support +- Metadata services + +## Version History + +- **Version 5.x:** Decoupled from TDS, BSD-3 license, modular architecture +- **Version 4.6 and earlier:** Combined with TDS in single repository +- **Current target:** Java 8 (community feedback being gathered for future versions) + +## When to Use NetCDF-Java + +Use netCDF-Java when you need to: + +- Read scientific data in Java applications +- Support multiple file formats with a single API +- Work with remote datasets (OPeNDAP, HTTP, S3) +- Manipulate metadata without rewriting files (NcML) +- Aggregate multiple files into virtual datasets +- Access coordinate system information +- Build web services for scientific data (with THREDDS) + +## Related Technologies + +- **netCDF-C:** C library for netCDF, can be called via JNI for netCDF-4 writing +- **THREDDS Data Server:** Web server built on netCDF-Java +- **OPeNDAP:** Protocol for remote data access +- **CF Conventions:** Metadata conventions for climate and forecast data +- **NcML:** XML language for netCDF metadata and aggregation diff --git a/windsurf-harnett/skills/opendap/README.md b/windsurf-harnett/skills/opendap/README.md new file mode 100644 index 000000000..8bd378fe1 --- /dev/null +++ b/windsurf-harnett/skills/opendap/README.md @@ -0,0 +1,35 @@ +# OPeNDAP Skill + +This Windsurf skill provides comprehensive knowledge of OPeNDAP (Open-source Project for a Network Data Access Protocol) for accessing and serving scientific data over the internet. + +## What This Skill Provides + +- **OPeNDAP architecture** and client/server model +- **Data Access Protocol (DAP)** versions 2 and 4 +- **Constraint expressions** for subsetting remote data +- **Data model** and type system +- **URL construction** and service endpoints +- **Client integration** with NetCDF and other tools + +## Files + +- **SKILL.md** - Main skill file with OPeNDAP overview and quick reference +- **references/PROTOCOL.md** - DAP2 and DAP4 protocol details +- **references/CONSTRAINTS.md** - Constraint expression syntax and examples +- **references/DATA-MODEL.md** - OPeNDAP data types and structures +- **references/CLIENT-USAGE.md** - Using OPeNDAP with various clients + +## When to Use + +Use this skill when: + +- Accessing remote scientific data via OPeNDAP URLs +- Writing programs that use OPeNDAP services +- Constructing constraint expressions for data subsetting +- Understanding DAP responses (DDS, DAS, DMR) +- Integrating OPeNDAP with NetCDF applications +- Debugging OPeNDAP client/server issues + +## Version + +Current version: 1.0 (January 19, 2026) diff --git a/windsurf-harnett/skills/opendap/SKILL.md b/windsurf-harnett/skills/opendap/SKILL.md new file mode 100644 index 000000000..a26275580 --- /dev/null +++ b/windsurf-harnett/skills/opendap/SKILL.md @@ -0,0 +1,406 @@ +--- +name: opendap +description: Understanding OPeNDAP (Open-source Project for a Network Data Access Protocol) for accessing remote scientific data via HTTP, including DAP2/DAP4 protocols, constraint expressions, data models, and client integration. Use when working with OPeNDAP URLs, writing data access code, or integrating with NetCDF. +metadata: + author: opendap-documentation + version: '1.0' + date: '2026-01-19' +--- + +# OPeNDAP Skill + +This skill provides comprehensive knowledge of OPeNDAP to help you access, serve, and work with remote scientific data effectively. + +## Overview + +OPeNDAP (Open-source Project for a Network Data Access Protocol) provides a way for researchers to access scientific data anywhere on the Internet from a wide variety of programs. It uses a client/server architecture built on HTTP and provides flexible data subsetting through constraint expressions. + +**Key Features**: + +- Network-transparent data access via URLs +- Data subsetting at the server (reduces bandwidth) +- Format-independent data model +- Compatible with NetCDF, HDF5, and other formats +- Supports gridded data, sequences, and complex structures + +## Core Concepts + +### 1. Client/Server Architecture + +OPeNDAP uses a web-based client/server model similar to the World Wide Web: + +- **Server (Hyrax)**: Translates data from storage format to DAP format for transmission +- **Client**: Requests data via URLs and translates DAP format to local API format +- **Protocol**: HTTP-based Data Access Protocol (DAP) + +**Data Flow**: + +``` +User Program → OPeNDAP Client Library → HTTP Request → OPeNDAP Server + ↓ +User Program ← Translated Data ← DAP Response ← Read Local Files +``` + +### 2. OPeNDAP URLs + +An OPeNDAP URL identifies a dataset and optionally includes a constraint expression: + +**Basic URL Structure**: + +``` +http://server.domain/path/to/dataset.nc +``` + +**URL with Constraint Expression**: + +``` +http://server.domain/path/to/dataset.nc?variable[start:stop]&selection_clause +``` + +**URL Suffixes** (Service Endpoints): + +- `.dds` - Dataset Descriptor Structure (DAP2 - data shape) +- `.das` - Data Attribute Structure (DAP2 - metadata) +- `.dmr.xml` - Dataset Metadata Response (DAP4 - combined structure) +- `.dods` - Binary data (DAP2) +- `.dap` - Binary data (DAP4) +- `.ascii` - ASCII representation of data +- `.html` - Web form interface +- `.info` - Combined DDS + DAS in HTML + +### 3. Data Access Protocol (DAP) + +**DAP2** (Older, widely supported): + +- Separate DDS and DAS responses +- Binary data in .dods format +- Simpler data model + +**DAP4** (Newer, enhanced): + +- Unified DMR (Dataset Metadata Response) in XML +- Enhanced data model with groups +- Better support for complex types +- Improved performance + +## OPeNDAP Data Model + +### Base Types + +- **Byte, Int16, Int32, Int64** - Integer types +- **UInt16, UInt32, UInt64** - Unsigned integers +- **Float32, Float64** - Floating-point numbers +- **String** - Character strings +- **URL** - Uniform Resource Locators + +### Constructor Types + +- **Array** - Multi-dimensional arrays with indexing +- **Structure** - Collection of related variables +- **Sequence** - Ordered collection of instances (like database rows) +- **Grid** - Array with coordinate map vectors + +**Example Grid**: + +``` +Grid { + Array: + Int16 sst[time=1857][lat=89][lon=180]; + Maps: + Float64 time[time=1857]; + Float64 lat[lat=89]; + Float64 lon[lon=180]; +} sst; +``` + +## Constraint Expressions + +Constraint expressions allow you to subset data on the server before transmission. + +### Syntax + +``` +URL?projection&selection +``` + +- **Projection**: Comma-separated list of variables to return +- **Selection**: Boolean expressions to filter data (prefixed with &) + +### Array Subsetting + +**Single element**: + +``` +?variable[index] +?sst[0][10][20] +``` + +**Range (start:stop)**: + +``` +?variable[start:stop] +?sst[0:10][20:30][40:50] +``` + +**Stride (start:stride:stop)**: + +``` +?variable[start:stride:stop] +?sst[0:2:100] # Every 2nd element from 0 to 100 +``` + +### Selection Clauses + +**Comparison operators**: `<`, `>`, `<=`, `>=`, `=`, `!=` + +**Examples**: + +``` +?station&station.temp>20.0 +?station&station.lat>0.0&station.lon<-60.0 +?station&station.month={4,5,6,7} # List for OR +``` + +### Server Functions + +Hyrax servers support functions for advanced operations: + +**geogrid()** - Subset by geographic coordinates: + +``` +?geogrid(sst, 62, 206, 56, 210, "19722 + +int ncid, varid; +char *url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz"; + +// Open remote dataset +nc_open(url, NC_NOWRITE, &ncid); + +// Access variables normally +nc_inq_varid(ncid, "sst", &varid); + +// Read data with subsetting +size_t start[] = {0, 0, 0}; +size_t count[] = {1, 10, 10}; +float data[10][10]; +nc_get_vara_float(ncid, varid, start, count, &data[0][0]); + +nc_close(ncid); +``` + +### URL with Constraint Expression + +You can include constraint expressions in the URL: + +```c +char *url = "http://server.org/data.nc?sst[0:10][20:30][40:50]"; +nc_open(url, NC_NOWRITE, &ncid); +``` + +### Fortran Programs + +```fortran +program read_opendap + use netcdf + implicit none + + integer :: ncid, varid, status + character(len=256) :: url + real :: data(10, 10) + + url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz" + + status = nf90_open(url, NF90_NOWRITE, ncid) + status = nf90_inq_varid(ncid, "sst", varid) + status = nf90_get_var(ncid, varid, data, & + start=[1,1,1], count=[10,10,1]) + status = nf90_close(ncid) + +end program read_opendap +``` + +## Common Workflows + +### 1. Exploring a Dataset + +**Step 1**: Get the DMR/DDS to see structure: + +``` +http://server.org/data.nc.dmr.xml +http://server.org/data.nc.dds (DAP2) +``` + +**Step 2**: Get attributes: + +``` +http://server.org/data.nc.das (DAP2) +``` + +**Step 3**: Use .info for combined view: + +``` +http://server.org/data.nc.info +``` + +### 2. Subsetting Data + +**Step 1**: Identify variable and dimensions from DMR/DDS + +**Step 2**: Construct constraint expression: + +``` +?variable[time_start:time_end][lat_start:lat_end][lon_start:lon_end] +``` + +**Step 3**: Test with .ascii to verify: + +``` +http://server.org/data.nc.ascii?sst[0:1][10:20][30:40] +``` + +**Step 4**: Use in your program with full URL + +### 3. Working with Sequences + +Sequences are like database tables with rows of data: + +**Get specific fields**: + +``` +?sequence.field1,sequence.field2 +``` + +**Filter rows**: + +``` +?sequence.field1,sequence.field2&sequence.field1>100 +``` + +**Multiple conditions**: + +``` +?sequence&sequence.temp>20&sequence.depth<100 +``` + +## Client Tools + +### Matlab + +Matlab 2012a+ has built-in OPeNDAP support via NetCDF interface: + +```matlab +url = 'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz'; +ncid = netcdf.open(url); +data = netcdf.getVar(ncid, varid); +netcdf.close(ncid); +``` + +### Python + +Using netCDF4-python or xarray: + +```python +import netCDF4 +url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz" +ds = netCDF4.Dataset(url) +sst = ds.variables['sst'][0:10, 20:30, 40:50] +``` + +### Command-line Tools + +**ncdump** (with OPeNDAP-enabled NetCDF): + +```bash +ncdump -h "http://server.org/data.nc" +ncdump -v sst "http://server.org/data.nc?sst[0:10][20:30]" +``` + +## Troubleshooting + +### Common Issues + +**1. URL not recognized**: + +- Ensure NetCDF library is compiled with DAP support +- Check URL syntax (http:// or https://) + +**2. Constraint expression errors**: + +- Verify variable names match DDS/DMR exactly +- Check array bounds (0-indexed) +- Ensure proper quoting in shell commands + +**3. Performance issues**: + +- Use constraint expressions to reduce data transfer +- Request only needed variables +- Consider server-side functions for processing + +**4. Authentication**: + +- Some servers require credentials +- Use .netrc file or URL-embedded credentials +- Check server documentation for auth methods + +## Quick Reference + +### Essential URL Patterns + +``` +# Get metadata +http://server/dataset.nc.dmr.xml (DAP4) +http://server/dataset.nc.dds (DAP2) +http://server/dataset.nc.das (DAP2) + +# Get data +http://server/dataset.nc?var[0:10] +http://server/dataset.nc.ascii?var[0:10] + +# Multiple variables +http://server/dataset.nc?var1,var2,var3 + +# With selection +http://server/dataset.nc?var&var>100 +``` + +### Constraint Expression Operators + +- Array: `[start:stop]`, `[start:stride:stop]` +- Comparison: `<`, `>`, `<=`, `>=`, `=`, `!=` +- String match: `~=` (regex) +- Lists: `{val1,val2,val3}` (OR operation) +- Structure fields: `structure.field` + +## Additional Resources + +See [references/PROTOCOL.md](references/PROTOCOL.md) for DAP2 and DAP4 protocol details. + +See [references/CONSTRAINTS.md](references/CONSTRAINTS.md) for comprehensive constraint expression examples. + +See [references/DATA-MODEL.md](references/DATA-MODEL.md) for complete data type documentation. + +See [references/CLIENT-USAGE.md](references/CLIENT-USAGE.md) for client integration examples. diff --git a/windsurf-harnett/skills/opendap/references/CLIENT-USAGE.md b/windsurf-harnett/skills/opendap/references/CLIENT-USAGE.md new file mode 100644 index 000000000..746145afa --- /dev/null +++ b/windsurf-harnett/skills/opendap/references/CLIENT-USAGE.md @@ -0,0 +1,341 @@ +# OPeNDAP Client Usage + +This document provides examples of using OPeNDAP with various client libraries and tools. + +## C Programs with NetCDF + +### Basic Data Access + +```c +#include +#include + +int main() { + int ncid, varid, status; + size_t start[3] = {0, 0, 0}; + size_t count[3] = {1, 10, 10}; + float data[10][10]; + + char *url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz"; + + status = nc_open(url, NC_NOWRITE, &ncid); + if (status != NC_NOERR) { + fprintf(stderr, "Error opening URL: %s\n", nc_strerror(status)); + return 1; + } + + status = nc_inq_varid(ncid, "sst", &varid); + status = nc_get_vara_float(ncid, varid, start, count, &data[0][0]); + + printf("Data at [0][5][5]: %f\n", data[5][5]); + + nc_close(ncid); + return 0; +} +``` + +### Using Constraint Expressions + +```c +char *url = "http://server.org/data.nc?sst[0:10][20:30][40:50]"; +nc_open(url, NC_NOWRITE, &ncid); +``` + +### Reading Metadata + +```c +int ndims, nvars, ngatts, unlimdimid; +char varname[NC_MAX_NAME+1]; + +nc_inq(ncid, &ndims, &nvars, &ngatts, &unlimdimid); + +for (int i = 0; i < nvars; i++) { + nc_inq_varname(ncid, i, varname); + printf("Variable %d: %s\n", i, varname); +} +``` + +## Fortran Programs + +### Basic Example + +```fortran +program read_opendap + use netcdf + implicit none + + integer :: ncid, varid, status + integer :: start(3), count(3) + real :: data(10, 10, 1) + character(len=256) :: url + + url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz" + + status = nf90_open(url, NF90_NOWRITE, ncid) + if (status /= NF90_NOERR) then + print *, "Error: ", trim(nf90_strerror(status)) + stop + end if + + status = nf90_inq_varid(ncid, "sst", varid) + + start = [1, 1, 1] + count = [10, 10, 1] + status = nf90_get_var(ncid, varid, data, start=start, count=count) + + print *, "Sample value: ", data(5, 5, 1) + + status = nf90_close(ncid) + +end program read_opendap +``` + +### Time Series Extraction + +```fortran +program time_series + use netcdf + implicit none + + integer :: ncid, varid, status, nt + real, allocatable :: temp(:) + character(len=256) :: url + + url = "http://server.org/data.nc?temp[0:1000][45][90]" + + status = nf90_open(url, NF90_NOWRITE, ncid) + status = nf90_inq_varid(ncid, "temp", varid) + status = nf90_inq_dimlen(ncid, 1, nt) + + allocate(temp(nt)) + status = nf90_get_var(ncid, varid, temp) + + print *, "Time series length: ", nt + print *, "Mean: ", sum(temp)/nt + + deallocate(temp) + status = nf90_close(ncid) + +end program time_series +``` + +## Python Examples + +### Using netCDF4-python + +```python +import netCDF4 as nc + +url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz" +dataset = nc.Dataset(url) + +print("Variables:", dataset.variables.keys()) + +sst = dataset.variables['sst'] +print("Shape:", sst.shape) +print("Units:", sst.units) + +data = sst[0:10, 20:30, 40:50] +print("Subset shape:", data.shape) + +dataset.close() +``` + +### Using xarray + +```python +import xarray as xr + +url = "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz" +ds = xr.open_dataset(url) + +print(ds) + +sst_subset = ds.sst.isel(time=slice(0, 10), lat=slice(20, 30), lon=slice(40, 50)) +mean_sst = sst_subset.mean() + +print(f"Mean SST: {mean_sst.values}") +``` + +### With Constraint Expressions + +```python +url_with_constraint = "http://server.org/data.nc?sst[0:100][0:50][0:80]" +ds = nc.Dataset(url_with_constraint) +``` + +## Matlab Examples + +### Basic Access + +```matlab +url = 'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz'; +ncid = netcdf.open(url); + +[numdims, numvars, numglobalatts, unlimdimid] = netcdf.inq(ncid); +fprintf('Number of variables: %d\n', numvars); + +varid = netcdf.inqVarID(ncid, 'sst'); +data = netcdf.getVar(ncid, varid, [0,0,0], [10,10,1]); + +fprintf('Sample value: %f\n', data(5,5)); + +netcdf.close(ncid); +``` + +### Subsetting + +```matlab +url = 'http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz'; +ncid = netcdf.open(url); +varid = netcdf.inqVarID(ncid, 'sst'); + +start = [2, 7, 0]; +count = [10, 8, 1]; +stride = [1, 1, 1]; +data = netcdf.getVar(ncid, varid, start, count, stride); + +imagesc(data'); +colorbar; +title('SST Subset'); +``` + +## Command-Line Tools + +### ncdump + +```bash +# View header +ncdump -h "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz" + +# Dump specific variable +ncdump -v sst "http://server.org/data.nc?sst[0:10][20:30]" + +# ASCII output +ncdump "http://server.org/data.nc" > output.txt +``` + +### curl + +```bash +# Get DDS +curl "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.dds" + +# Get DAS +curl "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.das" + +# Get ASCII data +curl "http://test.opendap.org/dap/data/nc/sst.mnmean.nc.gz.ascii?sst[0][0:5][0:5]" +``` + +## Error Handling + +### C Error Handling + +```c +int status; +char *url = "http://server.org/data.nc"; + +status = nc_open(url, NC_NOWRITE, &ncid); +if (status != NC_NOERR) { + fprintf(stderr, "NetCDF error: %s\n", nc_strerror(status)); + if (status == NC_ENOTNC) { + fprintf(stderr, "Not a valid NetCDF/OPeNDAP URL\n"); + } + return 1; +} +``` + +### Python Error Handling + +```python +try: + ds = nc.Dataset(url) + data = ds.variables['sst'][:] +except RuntimeError as e: + print(f"OPeNDAP error: {e}") +except KeyError as e: + print(f"Variable not found: {e}") +finally: + if 'ds' in locals(): + ds.close() +``` + +## Performance Tips + +### Minimize Requests + +```c +// Bad - multiple small requests +for (int i = 0; i < 100; i++) { + nc_get_vara_float(ncid, varid, &i, &one, &value); +} + +// Good - single larger request +nc_get_vara_float(ncid, varid, start, count, data); +``` + +### Use Constraint Expressions + +```python +# Bad - download everything then subset +ds = nc.Dataset("http://server.org/data.nc") +subset = ds.variables['sst'][0:10, 20:30, 40:50] + +# Good - subset at server +url = "http://server.org/data.nc?sst[0:10][20:30][40:50]" +ds = nc.Dataset(url) +subset = ds.variables['sst'][:] +``` + +### Cache Metadata + +```python +# Cache dataset structure +ds = nc.Dataset(url) +var_names = list(ds.variables.keys()) +dims = {name: len(ds.dimensions[name]) for name in ds.dimensions} + +# Reuse cached info for multiple accesses +for var_name in var_names: + data = ds.variables[var_name][:] +``` + +## Authentication + +### Using .netrc + +Create `~/.netrc`: + +``` +machine server.org +login username +password mypassword +``` + +Set permissions: + +```bash +chmod 600 ~/.netrc +``` + +### URL-Embedded Credentials + +```c +char *url = "http://username:password@server.org/data.nc"; +``` + +Note: Not recommended for security reasons. + +## Best Practices + +1. **Check metadata first** before requesting data +2. **Use constraint expressions** to minimize data transfer +3. **Handle errors gracefully** with proper error checking +4. **Cache when possible** to reduce repeated requests +5. **Close connections** when done +6. **Test URLs** with .ascii or .dds before using in code +7. **Use appropriate data types** matching server types +8. **Consider time zones** for temporal data +9. **Validate data** after retrieval (check for fill values) +10. **Document URLs** and constraint expressions in code diff --git a/windsurf-harnett/skills/opendap/references/CONSTRAINTS.md b/windsurf-harnett/skills/opendap/references/CONSTRAINTS.md new file mode 100644 index 000000000..202fdc7fa --- /dev/null +++ b/windsurf-harnett/skills/opendap/references/CONSTRAINTS.md @@ -0,0 +1,543 @@ +# OPeNDAP Constraint Expressions + +This document provides comprehensive examples and patterns for OPeNDAP constraint expressions. + +## Constraint Expression Basics + +A constraint expression has two parts: + +``` +URL?projection&selection +``` + +- **Projection**: What data to return (variables, array subsets) +- **Selection**: How to filter the data (boolean conditions) + +Both parts are optional. Either or both can be used. + +## Projection Expressions + +### Selecting Variables + +**Single variable**: + +``` +?temperature +``` + +**Multiple variables**: + +``` +?temperature,salinity,pressure +``` + +**Structure fields**: + +``` +?station.latitude,station.longitude,station.time +``` + +**Nested structures**: + +``` +?cruise.station.cast.temperature +``` + +### Array Subsetting + +**Single element**: + +``` +?sst[0][10][20] +``` + +**Range (start:stop)**: + +``` +?sst[0:10][20:30][40:50] +``` + +**Stride (start:stride:stop)**: + +``` +?sst[0:2:100][0:5:50][0:10:180] +# Every 2nd time, every 5th lat, every 10th lon +``` + +**Open-ended ranges**: + +``` +?sst[10:] # From index 10 to end +?sst[:100] # From start to index 100 +?sst[:] # Entire dimension +``` + +### Grid Subsetting + +When subsetting a Grid, include coordinate variables: + +**Without coordinates** (just the array): + +``` +?sst[0:10][20:30][40:50] +``` + +**With coordinates**: + +``` +?time[0:10],lat[20:30],lon[40:50],sst[0:10][20:30][40:50] +``` + +**Using geogrid() function** (Hyrax servers): + +``` +?geogrid(sst, north_lat, west_lon, south_lat, east_lon) +?geogrid(sst, 62, 206, 56, 210) +``` + +## Selection Expressions + +### Comparison Operators + +**Numeric comparisons**: + +``` +?station&station.temperature>20.0 +?station&station.depth<100 +?station&station.salinity>=34.5 +?station&station.pressure<=1000 +?station&station.id=12345 +?station&station.quality!=0 +``` + +**String comparisons**: + +``` +?station&station.name="Station_A" +?station&station.type!="reference" +``` + +**String pattern matching** (regex): + +``` +?station&station.comment~=".*shark.*" +?station&station.location~="^North.*" +``` + +### Multiple Conditions + +**AND conditions** (multiple & clauses): + +``` +?station&station.lat>0.0&station.lon<-60.0 +?station&station.temp>20&station.depth<50&station.salinity>34 +``` + +**OR conditions** (using lists): + +``` +?station&station.month={4,5,6,7} +?station&station.type={"CTD","XBT","profiler"} +``` + +**Combining variables in lists**: + +``` +?station&station.month={4,5,6,station.monsoon_month} +``` + +### Range Conditions + +**Value between bounds**: + +``` +?station&station.temp>15&station.temp<25 +?data&1019722&data.time<19755 +``` + +## Sequence Operations + +Sequences are like database tables with rows of data. + +### Selecting Fields + +**Specific fields**: + +``` +?sequence.field1,sequence.field2,sequence.field3 +``` + +**All fields with filter**: + +``` +?sequence&sequence.temperature>20 +``` + +### Filtering Rows + +**Single condition**: + +``` +?URI_GSO-Dock.Time,URI_GSO-Dock.Sea_Temp&URI_GSO-Dock.Time<35234.1 +``` + +**Multiple conditions**: + +``` +?station.cast.press,station.cast.temp&station.cast.press>500.0 +?station.cast&station.cast.temp>22.0 +``` + +**Complex filters**: + +``` +?station&station.lat>0.0&station.month={4,5,6,7} +``` + +## Server Functions + +### geogrid() - Geographic Subsetting + +**Syntax**: + +``` +geogrid(variable, top, left, bottom, right, [other_expressions]) +``` + +**Example**: + +``` +?geogrid(sst, 62, 206, 56, 210, "1972219722 +``` + +**Multiple variables with conditions**: + +``` +?lat,lon,temp,salinity&temp>20&salinity>34 +``` + +### Working with Nested Structures + +**Nested sequence**: + +``` +?cruise.station.cast.depth,cruise.station.cast.temp +``` + +**Filter on nested field**: + +``` +?cruise.station&cruise.station.latitude>0 +``` + +**Multiple levels**: + +``` +?cruise.station.cast&cruise.station.cast.temp>20 +``` + +### Sampling Patterns + +**Every Nth element**: + +``` +?sst[0:10:1857][0:5:89][0:10:180] +``` + +**Sparse sampling**: + +``` +?sst[::100][::10][::20] # Every 100th time, 10th lat, 20th lon +``` + +**Diagonal sampling** (if supported): + +``` +?array[0:10][0:10] # 11x11 subset +``` + +## Common Use Cases + +### 1. Time Series at a Point + +``` +?time,sst[0:1857][44][90] +# All times, single lat/lon point +``` + +### 2. Spatial Subset at One Time + +``` +?lat[20:40],lon[100:140],sst[0][20:40][100:140] +# Single time, regional subset +``` + +### 3. Vertical Profile + +``` +?depth,temperature[0:500],salinity[0:500] +# Full depth profile +``` + +### 4. Quality-Filtered Data + +``` +?time,temp,salinity&quality_flag=1 +# Only high-quality data +``` + +### 5. Regional and Temporal Subset + +``` +?geogrid(sst, 45, -130, 30, -110, "1990025&wind_speed<5 +# Warm, calm conditions +``` + +## Pattern Matching + +### Regular Expression Syntax + +**Wildcards**: + +- `.` - Any single character +- `.*` - Zero or more characters +- `.+` - One or more characters +- `.?` - Zero or one character + +**Anchors**: + +- `^` - Start of string +- `$` - End of string + +**Character classes**: + +- `[abc]` - Match a, b, or c +- `[0-9]` - Match any digit +- `[^0-9]` - Match any non-digit + +**Examples**: + +``` +?station&station.comment~=".*shark.*" # Contains "shark" +?station&station.name~="^Station_[0-9]+$" # Station_123 format +?data&data.type~="CTD|XBT|profiler" # Multiple types +``` + +## Error Handling + +### Common Constraint Expression Errors + +**Invalid variable name**: + +``` +Error: Variable 'sst_temp' not found +Fix: Check DDS/DMR for correct name +``` + +**Index out of bounds**: + +``` +Error: Array index [2000] exceeds dimension size [1857] +Fix: Verify dimension sizes in DDS/DMR +``` + +**Syntax error**: + +``` +Error: Expected ']' but found ',' +Fix: Check bracket matching and syntax +``` + +**Type mismatch**: + +``` +Error: Cannot compare String with Float64 +Fix: Use appropriate operators for data type +``` + +### Testing Constraint Expressions + +**Use .ascii for debugging**: + +``` +http://server/data.nc.ascii?sst[0:1][0:5][0:5] +``` + +**Check metadata first**: + +``` +http://server/data.nc.dmr.xml +http://server/data.nc.dds +``` + +**Test incrementally**: + +1. Start with simple projection: `?variable` +2. Add subsetting: `?variable[0:10]` +3. Add selection: `?variable[0:10]&variable>100` + +## Performance Tips + +### Minimize Data Transfer + +**Request only needed variables**: + +``` +?temp,salinity # Not ?* +``` + +**Use appropriate stride**: + +``` +?sst[0:10:1857] # Every 10th instead of all +``` + +**Subset at server**: + +``` +?sst[0:100][20:40][50:80] # Not full array +``` + +### Leverage Server Functions + +**Process at server**: + +``` +?linear_scale(geogrid(sst, 45, -130, 30, -110)) +``` + +**Combine operations**: + +``` +?mean(sst[0:100][20:40][50:80]) # If server supports +``` + +### Cache Metadata + +**Reuse DDS/DAS/DMR**: + +- Cache structure information +- Avoid repeated metadata requests +- Use cached info for constraint construction + +## Examples by Data Type + +### Gridded Data (Arrays/Grids) + +``` +# Single point +?sst[100][44][90] + +# Regional subset +?sst[0:100][20:40][80:120] + +# Time series at point +?time,sst[0:1857][44][90] + +# Spatial map at time +?lat,lon,sst[100][0:89][0:180] +``` + +### Station Data (Sequences) + +``` +# All stations +?station + +# Specific fields +?station.id,station.lat,station.lon,station.time + +# Filtered stations +?station&station.lat>0&station.lon<-60 + +# Quality filtered +?station.temp,station.salinity&station.quality=1 +``` + +### Profile Data (Nested Sequences) + +``` +# All profiles +?cruise.station.cast + +# Specific depths +?cruise.station.cast.depth,cruise.station.cast.temp&cruise.station.cast.depth<100 + +# Filtered by location +?cruise.station.cast&cruise.station.latitude>30 +``` + +## Best Practices + +1. **Always check metadata first** - Use .dds/.dmr.xml to understand structure +2. **Test with .ascii** - Verify constraint expressions before using in code +3. **Use server functions** - Leverage geogrid(), linear_scale(), etc. +4. **Minimize data transfer** - Request only what you need +5. **Handle errors gracefully** - Check HTTP status codes and error messages +6. **Cache when possible** - Reuse metadata and frequently accessed subsets +7. **Document constraints** - Complex expressions can be hard to understand later diff --git a/windsurf-harnett/skills/opendap/references/DATA-MODEL.md b/windsurf-harnett/skills/opendap/references/DATA-MODEL.md new file mode 100644 index 000000000..f8c55e7d4 --- /dev/null +++ b/windsurf-harnett/skills/opendap/references/DATA-MODEL.md @@ -0,0 +1,602 @@ +# OPeNDAP Data Model + +This document provides comprehensive documentation of the OPeNDAP data model and type system. + +## Overview + +The OPeNDAP data model is designed to be general enough to represent data from various storage formats (NetCDF, HDF5, relational databases, etc.) while being specific enough to preserve the structure and relationships in the data. + +## Base Types + +Base types represent atomic data values. + +### Numeric Types + +**Byte** (8-bit signed integer): + +``` +Byte temperature; +Range: -128 to 127 +``` + +**Int16** (16-bit signed integer): + +``` +Int16 elevation; +Range: -32,768 to 32,767 +``` + +**Int32** (32-bit signed integer): + +``` +Int32 station_id; +Range: -2,147,483,648 to 2,147,483,647 +``` + +**Int64** (64-bit signed integer, DAP4 only): + +``` +Int64 timestamp; +Range: -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +``` + +**UInt16** (16-bit unsigned integer): + +``` +UInt16 count; +Range: 0 to 65,535 +``` + +**UInt32** (32-bit unsigned integer): + +``` +UInt32 pixel_value; +Range: 0 to 4,294,967,295 +``` + +**UInt64** (64-bit unsigned integer, DAP4 only): + +``` +UInt64 file_size; +Range: 0 to 18,446,744,073,709,551,615 +``` + +**Float32** (32-bit floating point): + +``` +Float32 temperature; +IEEE 754 single precision +``` + +**Float64** (64-bit floating point): + +``` +Float64 latitude; +IEEE 754 double precision +``` + +### String Types + +**String** (variable-length character string): + +``` +String station_name; +String comment; +``` + +**URL** (Uniform Resource Locator): + +``` +URL data_source; +``` + +## Constructor Types + +Constructor types build complex data structures from base types and other constructors. + +### Array + +Multi-dimensional arrays of any base or constructor type. + +**Syntax**: + +``` +Type name[dim1][dim2]...[dimN]; +``` + +**Examples**: + +``` +Float32 temperature[time=100][lat=50][lon=80]; +Int16 elevation[y=1000][x=1000]; +String station_names[stations=25]; +``` + +**Characteristics**: + +- Fixed dimensions at creation +- Homogeneous (all elements same type) +- Zero-indexed +- Rectangular (not ragged) + +**Subsetting**: + +``` +temperature[0:10][20:30][40:50] # Hyperslab +temperature[5][25][60] # Single element +temperature[0:2:100] # With stride +``` + +### Structure + +Named collection of variables (like a C struct). + +**Syntax**: + +``` +Structure { + Type1 field1; + Type2 field2; + ... +} name; +``` + +**Example**: + +``` +Structure { + Float64 latitude; + Float64 longitude; + Int32 elevation; + String name; +} station; +``` + +**Characteristics**: + +- Heterogeneous (different types) +- Named fields +- Accessed via dot notation: `station.latitude` + +**Use Cases**: + +- Grouping related metadata +- Station information +- Coordinate pairs +- Complex data records + +### Sequence + +Ordered collection of instances (like database rows). + +**Syntax**: + +``` +Sequence { + Type1 field1; + Type2 field2; + ... +} name; +``` + +**Example**: + +``` +Sequence { + Float64 time; + Float64 depth; + Float32 temperature; + Float32 salinity; +} cast; +``` + +**Characteristics**: + +- Variable length (number of instances unknown) +- Each instance is a Structure +- Can be filtered with selection expressions +- Accessed one instance at a time + +**Nested Sequences**: + +``` +Sequence { + Int32 station_id; + Float64 latitude; + Float64 longitude; + Sequence { + Float64 depth; + Float32 temperature; + Float32 salinity; + } measurements; +} station; +``` + +**Filtering**: + +``` +?cast&cast.temperature>20 +?cast.depth,cast.temperature&cast.depth<100 +``` + +### Grid + +Array with coordinate map vectors (georeferenced data). + +**Syntax**: + +``` +Grid { + Array: + Type array_name[dim1][dim2]...[dimN]; + Maps: + Type1 map1[dim1]; + Type2 map2[dim2]; + ... +} name; +``` + +**Example**: + +``` +Grid { + Array: + Int16 sst[time=1857][lat=89][lon=180]; + Maps: + Float64 time[time=1857]; + Float64 lat[lat=89]; + Float64 lon[lon=180]; +} sst; +``` + +**Characteristics**: + +- Combines array with coordinate variables +- Maps provide independent variable values +- Common for geospatial data +- Subsetting returns both array and corresponding maps + +**Use Cases**: + +- Gridded climate data +- Satellite imagery +- Model output +- Any regularly or irregularly spaced data + +## DAP4 Extensions + +DAP4 adds several enhancements to the data model. + +### Groups + +Hierarchical organization of variables (like HDF5 groups). + +**Example**: + +``` +Group { + Float64 time[time=100]; + + Group heights { + Float32 delta_time[time=100]; + Float64 lat_ph[time=100]; + Float64 lon_ph[time=100]; + } + + Group quality { + Int32 qa_flag[time=100]; + } +} gt3r; +``` + +**Access**: + +``` +?/gt3r/heights/lat_ph +?/gt3r/quality/qa_flag +``` + +### Opaque + +Binary data of unknown structure. + +**Syntax**: + +``` +Opaque name; +``` + +**Use Cases**: + +- Embedded images +- Compressed data +- Proprietary formats +- Binary metadata + +### Enumeration + +Named integer constants (like C enum). + +**Syntax**: + +``` +Enum quality_flag { + good = 0, + questionable = 1, + bad = 2, + missing = 3 +}; +``` + +**Usage**: + +``` +quality_flag qa[time=100]; +``` + +## Data Model Translation + +### From NetCDF-3 to OPeNDAP + +**NetCDF-3 Dimensions** → **OPeNDAP Array dimensions**: + +``` +NetCDF: float temp(time, lat, lon); +OPeNDAP: Float32 temp[time][lat][lon]; +``` + +**NetCDF-3 Variables** → **OPeNDAP Grid** (if coordinate variables exist): + +``` +NetCDF: + float sst(time, lat, lon); + double time(time); + float lat(lat); + float lon(lon); + +OPeNDAP: + Grid { + Array: Float32 sst[time][lat][lon]; + Maps: Float64 time[time]; + Float32 lat[lat]; + Float32 lon[lon]; + } sst; +``` + +**NetCDF-3 Attributes** → **OPeNDAP DAS**: + +``` +NetCDF: sst:units = "degC"; +OPeNDAP DAS: + sst { + String units "degC"; + } +``` + +### From HDF5 to OPeNDAP + +**HDF5 Groups** → **DAP4 Groups**: + +``` +HDF5: /group1/subgroup/dataset +DAP4: /group1/subgroup/dataset +``` + +**HDF5 Datasets** → **OPeNDAP Arrays**: + +``` +HDF5: Dataset "temperature" [100][50][80] +DAP4: Float32 temperature[100][50][80]; +``` + +**HDF5 Compound Types** → **OPeNDAP Structures**: + +``` +HDF5 Compound: + { + "lat": H5T_NATIVE_DOUBLE, + "lon": H5T_NATIVE_DOUBLE, + "elev": H5T_NATIVE_INT + } + +OPeNDAP: + Structure { + Float64 lat; + Float64 lon; + Int32 elev; + } +``` + +### From Relational Database to OPeNDAP + +**Database Table** → **OPeNDAP Sequence**: + +``` +SQL Table: + CREATE TABLE stations ( + id INT, + lat DOUBLE, + lon DOUBLE, + name VARCHAR(50) + ); + +OPeNDAP: + Sequence { + Int32 id; + Float64 lat; + Float64 lon; + String name; + } stations; +``` + +**SQL WHERE** → **OPeNDAP Selection**: + +``` +SQL: SELECT * FROM stations WHERE lat > 0; +OPeNDAP: ?stations&stations.lat>0 +``` + +## Data Type Sizes + +| Type | Size (bytes) | Notes | +| ------- | ------------ | ------------------- | +| Byte | 1 | Signed | +| Int16 | 2 | Signed | +| Int32 | 4 | Signed | +| Int64 | 8 | Signed, DAP4 only | +| UInt16 | 2 | Unsigned | +| UInt32 | 4 | Unsigned | +| UInt64 | 8 | Unsigned, DAP4 only | +| Float32 | 4 | IEEE 754 | +| Float64 | 8 | IEEE 754 | +| String | Variable | Null-terminated | +| URL | Variable | Null-terminated | + +## Attributes + +Attributes provide metadata about variables and datasets. + +### Attribute Types + +All base types can be used as attributes: + +``` +String long_name "Sea Surface Temperature"; +String units "degrees_C"; +Float32 valid_min -5.0; +Float32 valid_max 45.0; +Int32 missing_value -9999; +``` + +### Attribute Containers + +**Variable attributes**: + +``` +sst { + String long_name "Sea Surface Temperature"; + String units "degC"; + Float32 scale_factor 0.01; +} +``` + +**Global attributes**: + +``` +NC_GLOBAL { + String title "COADS 1-degree Enhanced"; + String institution "NOAA/PMEL"; + String history "Created 2020-01-15"; +} +``` + +### Standard Attributes + +**CF Conventions**: + +- `units` - Physical units +- `long_name` - Descriptive name +- `standard_name` - CF standard name +- `valid_min`, `valid_max` - Valid range +- `_FillValue` - Missing data indicator +- `scale_factor`, `add_offset` - Packing parameters + +## Complex Data Examples + +### Time Series Station Data + +``` +Dataset { + Sequence { + Int32 station_id; + Float64 latitude; + Float64 longitude; + String station_name; + Sequence { + Float64 time; + Float32 temperature; + Float32 salinity; + Float32 pressure; + Int32 quality_flag; + } measurements; + } stations; +} ocean_observations; +``` + +### Satellite Imagery with Metadata + +``` +Dataset { + Structure { + String satellite_name; + String sensor_type; + Float64 acquisition_time; + Structure { + Float64 upper_left_lat; + Float64 upper_left_lon; + Float64 lower_right_lat; + Float64 lower_right_lon; + } bounds; + } metadata; + + Grid { + Array: + UInt16 radiance[line=1000][pixel=1000]; + Maps: + Float32 latitude[line=1000]; + Float32 longitude[pixel=1000]; + } image; +} satellite_scene; +``` + +### Climate Model Output + +``` +Dataset { + Grid { + Array: + Float32 temperature[time=365][level=50][lat=180][lon=360]; + Maps: + Float64 time[time=365]; + Float32 level[level=50]; + Float32 lat[lat=180]; + Float32 lon[lon=360]; + } air_temperature; + + Grid { + Array: + Float32 u_wind[time=365][level=50][lat=180][lon=360]; + Maps: + Float64 time[time=365]; + Float32 level[level=50]; + Float32 lat[lat=180]; + Float32 lon[lon=360]; + } eastward_wind; +} climate_model; +``` + +## Best Practices + +### Choosing Data Types + +1. **Use Grid for georeferenced data** with coordinate variables +2. **Use Sequence for variable-length data** (stations, profiles) +3. **Use Structure for related metadata** (location, time, etc.) +4. **Use appropriate numeric precision** (Float32 vs Float64) +5. **Use String for text data** (names, comments) + +### Data Organization + +1. **Group related variables** in Structures +2. **Use meaningful names** for variables and fields +3. **Include coordinate variables** for arrays +4. **Add comprehensive attributes** for metadata +5. **Follow conventions** (CF, COARDS, etc.) + +### Performance Considerations + +1. **Array layout matters** - row-major vs column-major +2. **Sequence access is sequential** - can't random access +3. **Grid subsetting is efficient** - server-side processing +4. **Structure access is fast** - all fields together +5. **String data is variable-length** - can impact performance diff --git a/windsurf-harnett/skills/opendap/references/PROTOCOL.md b/windsurf-harnett/skills/opendap/references/PROTOCOL.md new file mode 100644 index 000000000..6e8fcdfa1 --- /dev/null +++ b/windsurf-harnett/skills/opendap/references/PROTOCOL.md @@ -0,0 +1,380 @@ +# OPeNDAP Protocol Details + +This document provides detailed information about the Data Access Protocol (DAP) versions 2 and 4. + +## DAP2 Protocol + +DAP2 is the original OPeNDAP protocol, widely supported and stable. + +### DAP2 Responses + +**Dataset Descriptor Structure (DDS)**: + +- Describes the "shape" of the data +- C-like syntax showing variables, dimensions, and types +- Accessed via `.dds` suffix + +Example DDS: + +``` +Dataset { + Grid { + Array: + Int16 sst[time = 1857][lat = 89][lon = 180]; + Maps: + Float64 time[time = 1857]; + Float64 lat[lat = 89]; + Float64 lon[lon = 180]; + } sst; + Float64 time_bnds[time = 1857][nbnds = 2]; +} sst.mnmean.nc; +``` + +**Data Attribute Structure (DAS)**: + +- Contains metadata about variables +- Includes units, descriptions, valid ranges, etc. +- Accessed via `.das` suffix + +Example DAS: + +``` +Attributes { + sst { + String long_name "Monthly Means of Sea Surface Temperature"; + String units "degC"; + Float32 scale_factor 0.01; + Float32 add_offset 0.0; + Int16 missing_value 32767; + String valid_range "-500, 4500"; + } + NC_GLOBAL { + String title "COADS 1-degree Equatorial Enhanced"; + String history "Created by NOAA/PMEL"; + } +} +``` + +**Binary Data (.dods)**: + +- XDR-encoded binary data +- Includes DDS followed by data values +- Most efficient for data transfer + +### DAP2 Data Types + +**Simple Types**: + +- Byte, Int16, Int32 +- UInt16, UInt32 +- Float32, Float64 +- String, URL + +**Constructor Types**: + +- Array - Multi-dimensional arrays +- Structure - Named collection of variables +- Sequence - Ordered list of structures +- Grid - Array with coordinate maps + +### DAP2 Constraint Expression Format + +**Projection** (what to return): + +``` +?var1,var2,var3 +?structure.field1,structure.field2 +?array[start:stop] +``` + +**Selection** (filtering): + +``` +?var&var>value +?sequence&sequence.field<100 +``` + +## DAP4 Protocol + +DAP4 is the enhanced protocol with improved features and performance. + +### DAP4 Responses + +**Dataset Metadata Response (DMR)**: + +- Unified XML document combining structure and attributes +- Accessed via `.dmr.xml` suffix +- Supports groups (hierarchical organization) + +Example DMR: + +```xml + + + + + + + + + + + Monthly Means of Sea Surface Temperature + + + degC + + + 0.01 + + + + + + + days since 1800-1-1 00:00:00 + + + +``` + +**Binary Data (.dap)**: + +- More efficient encoding than DAP2 +- Chunked transfer for large datasets +- Better compression support + +### DAP4 Enhancements + +**Groups**: + +- Hierarchical organization like HDF5 +- Namespace management +- Example: `/group1/subgroup/variable` + +**Enhanced Types**: + +- Opaque - Binary blobs +- Enum - Enumerated types +- 64-bit integers (Int64, UInt64) + +**Improved Constraint Expressions**: + +``` +?dap4.ce=/variable[0:10] +?dap4.ce=/group/variable&/group/variable>100 +``` + +### DAP4 Constraint Expression Format + +**Projection with namespace**: + +``` +?dap4.ce=/sst +?dap4.ce=/gt3r/heights/delta_time,/gt3r/heights/lon_ph +``` + +**Filters**: + +``` +?dap4.ce=/sst[0:100][0:50][0:80] +?dap4.ce=/sequence{field1,field2|field1>100} +``` + +## Protocol Comparison + +| Feature | DAP2 | DAP4 | +| ----------------- | ---------------- | ----------------- | +| Metadata | Separate DDS/DAS | Unified DMR (XML) | +| Groups | No | Yes | +| 64-bit integers | No | Yes | +| Encoding | XDR | More efficient | +| Chunking | Limited | Full support | +| Constraint syntax | Simple | Enhanced | +| Adoption | Universal | Growing | + +## Choosing DAP Version + +**Use DAP2 when**: + +- Maximum compatibility needed +- Working with older servers +- Simple data structures +- Established workflows + +**Use DAP4 when**: + +- Working with grouped data (HDF5-like) +- Need 64-bit integer support +- Large dataset performance critical +- Server supports DAP4 + +## HTTP Details + +### Request Methods + +**GET requests** for all operations: + +``` +GET /path/to/dataset.nc.dds HTTP/1.1 +Host: server.domain +``` + +### Response Headers + +``` +Content-Type: application/vnd.opendap.dds +Content-Type: application/vnd.opendap.das +Content-Type: application/vnd.opendap.data +Content-Type: text/xml (for DMR) +``` + +### Authentication + +**Basic HTTP Auth**: + +``` +http://username:password@server.org/data.nc +``` + +**.netrc file** (recommended): + +``` +machine server.org +login username +password mypassword +``` + +**OAuth/Bearer tokens** (server-dependent): + +``` +Authorization: Bearer +``` + +## Error Handling + +### DAP2 Errors + +Errors returned as text with HTTP error codes: + +``` +Error { + code = 404; + message = "Variable 'xyz' not found"; +}; +``` + +### DAP4 Errors + +XML error responses: + +```xml + + Variable not found: xyz + Dataset: /data/file.nc + Check variable name spelling + +``` + +### Common HTTP Status Codes + +- **200 OK** - Success +- **400 Bad Request** - Invalid constraint expression +- **401 Unauthorized** - Authentication required +- **404 Not Found** - Dataset or variable not found +- **500 Internal Server Error** - Server-side error + +## Performance Considerations + +### Bandwidth Optimization + +1. **Use constraint expressions** to subset at server +2. **Request only needed variables** in projection +3. **Use appropriate stride** for sampling +4. **Leverage server functions** for processing + +### Caching + +Servers may cache: + +- Metadata responses (DDS, DAS, DMR) +- Constraint expression results +- Aggregated datasets + +Clients should cache: + +- Metadata for repeated access +- Frequently accessed subsets + +### Compression + +DAP4 supports: + +- Chunked transfer encoding +- Gzip compression +- Server-side compression filters + +Request compressed responses: + +``` +Accept-Encoding: gzip, deflate +``` + +## Server Capabilities + +### Version Detection + +Query server version: + +``` +http://server.org/opendap/version +``` + +### Function Discovery + +List available server functions: + +``` +http://server.org/data.nc?version() +``` + +Get function help: + +``` +http://server.org/data.nc?function_name() +``` + +### Service Endpoints + +Standard endpoints: + +- `.dds` - DAP2 structure +- `.das` - DAP2 attributes +- `.dods` - DAP2 binary data +- `.dmr.xml` - DAP4 metadata +- `.dap` - DAP4 binary data +- `.ascii` - ASCII representation +- `.html` - Web form +- `.info` - Combined info page +- `.ver` - Version information + +## Protocol Extensions + +### Server-Side Processing + +Some servers support: + +- **Aggregation** - Combine multiple files +- **Subsetting** - Spatial/temporal subsetting +- **Transformation** - Unit conversion, reprojection +- **Statistics** - Mean, min, max calculations + +### Custom Functions + +Servers can implement custom functions: + +``` +?custom_function(variable, param1, param2) +``` + +Check server documentation for available functions. diff --git a/windsurf-harnett/workflows/implement.md b/windsurf-harnett/workflows/implement.md new file mode 100644 index 000000000..e7c9d10e8 --- /dev/null +++ b/windsurf-harnett/workflows/implement.md @@ -0,0 +1,134 @@ +--- +description: implement an issue +auto_execution_mode: 3 +--- + +# Issue Implementation Workflow + +This workflow implements GitHub issues by extracting planning information and following structured implementation plans. + +**Usage**: `/implement [issue_number]` or `Implement issue [issue_number]` + +## Implementation Process + +### Phase 1: Issue Analysis & Planning Extraction + +1. **Fetch GitHub Issue Details** + - Use `mcp0_issue_read` to get issue body, labels, and metadata + - Use `mcp0_issue_read` with `get_comments` to retrieve all comments + - Parse issue title, state, and assignee information + +2. **Extract Implementation Plan from Issue** + - Look for these sections in issue body and comments: + - "Implementation Plan", "Implementation Steps", or "Implementation Roadmap" + - "Requirements & Acceptance Criteria" + - "Technical Details" or "Technical Approach" + - "Dependencies" and "Testing Requirements" + - Extract numbered/bulleted task lists with time estimates + - Identify acceptance criteria with checkboxes + - Parse technical specifications and code examples + +3. **Create Task Management Structure** + - Initialize todo list with extracted implementation steps + - Mark first step as `in_progress` + - Include dependencies, testing, and documentation tasks + +### Phase 2: Context & Architecture Review + +4. **Review Project Documentation** + - Read `docs/design.md` for architecture and design patterns + - Read `docs/prd.md` for product requirements and specifications + - Check for relevant format-specific documentation (e.g., GeoTIFF, CDF) + +5. **Examine Codebase Context** + - Locate relevant source files mentioned in the issue + - Review existing implementation patterns + - Identify similar functionality for reference + - Check build system integration points + +### Phase 3: Structured Implementation + +6. **Execute Implementation Steps** + For each step in the extracted plan: + - **Code Implementation**: Write/modify code following existing patterns + - **Documentation Updates**: Update relevant docs if specified + - **Testing**: Implement unit/integration tests as required + - **Validation**: Verify acceptance criteria are met + - **Progress Tracking**: Mark step complete, update todo list + +7. **Handle Dependencies** + - Verify all dependencies are satisfied before implementation + - Check for blocking issues or prerequisites + - Ensure build system changes are consistent across CMake/Autotools + +### Phase 4: Quality Assurance & Completion + +8. **Testing & Validation** + - Run unit tests for new/modified code + - Execute integration tests with sample data + - Verify no regression in existing functionality + - Check memory management and error handling + +9. **Definition of Done Verification** + - All acceptance criteria met + - Tests passing with required coverage + - Documentation updated + - No regressions introduced + - Code follows project patterns + +10. **Final Integration** + - Run full test suite using `/btest` workflow + - Update GitHub issue with implementation status + - Mark issue as ready for review/close if appropriate + +## Implementation Guidelines + +### Code Quality Standards + +- Follow existing code style and patterns +- Maintain backward compatibility +- Use established error handling patterns +- Ensure proper memory management +- Add appropriate logging and debugging support + +### Testing Requirements + +- Unit tests for all new functions +- Integration tests for new features +- Error handling path testing +- Memory leak validation +- Performance testing if applicable + +### Documentation Standards + +- Update API documentation for new functions +- Add inline comments for complex logic +- Update design documents if architecture changes +- Include usage examples in relevant docs + +## Error Handling + +- If issue lacks clear implementation plan, redirect to `/issue` workflow first +- If dependencies are missing, document and block implementation +- If tests fail, debug and fix before proceeding +- If acceptance criteria unclear, ask for clarification on GitHub issue + +## Progress Tracking + +- Use todo_list to track implementation steps +- Update GitHub issue with progress comments +- Mark tasks complete as they are finished +- Document any deviations from the original plan + +## Example Implementation Flow + +For issue 92 (CRS Metadata Extraction): + +1. Fetch issue → Extract 7-step implementation plan +2. Review docs/design.md → Understand GeoTIFF architecture +3. Examine src/geotifffile.c → Locate placeholder code +4. Implement Step 1: Add CRS data structures → Update header +5. Implement Step 2: extract_crs_parameters() → Write function +6. Continue through all steps → Run tests → Update GitHub + +Example usage: "/implement 60" or "Implement issue 60" diff --git a/windsurf-harnett/workflows/issue.md b/windsurf-harnett/workflows/issue.md new file mode 100644 index 000000000..11b34e4af --- /dev/null +++ b/windsurf-harnett/workflows/issue.md @@ -0,0 +1,237 @@ +--- +description: Refine GitHub issues and generate implementation plans +--- + +# Issue Refinement Workflow + +This workflow helps refine GitHub issues systematically to clarify requirements, identify dependencies, and generate actionable implementation plans. + +**This workflow refines issues and generates implementation plans—no code implementation.** + +1. **Issue Assessment & Validation**: Review and validate the GitHub issue +2. **Documentation Research**: Consult project documentation for architectural context +3. **Codebase Context**: Examine existing code patterns and similar implementations +4. **Requirements Refinement**: Ask targeted clarifying questions +5. **Dependency Analysis**: Check for dependencies and conflicts +6. **Implementation Plan Generation**: Create structured, actionable plan +7. **GitHub Documentation**: Record refined requirements and implementation plan + +**The output is a refined issue with a clear implementation plan. Code implementation happens in a separate workflow.** + +--- + +## Step 1: Issue Analysis & Validation + +First, thoroughly examine the GitHub issue to understand: + +- The problem description and context +- Current behavior vs. expected behavior +- Any error messages, logs, or screenshots provided +- Existing discussion or comments + +**Validate issue quality:** + +- ✓ Issue title is clear and specific +- ✓ Problem description is understandable +- ✓ Expected behavior is defined +- ✓ At least one reproduction step (for bugs) +- ✓ Environment details provided (if relevant) +- **If issue is incomplete:** Ask for missing information before proceeding + +--- + +## Step 2: Consult Project Documentation + +Review the project's documentation to gather architectural context and development guidance: + +- **Examine docs/design.md** for system architecture, design patterns, and technical specifications +- **Check docs/prd.md** for product requirements, feature specifications, and business requirements +- **Review docs/prfaq.md** for frequently asked questions, common issues, and implementation guidance +- **Check README.md** for build instructions, dependencies, and project overview +- Look for relevant design decisions or technical specifications in the documentation +- Understand established patterns and conventions used in the project + +--- + +## Step 3: Examine Codebase Context + +**Search for similar implementations:** + +- Look for related functionality in `src/`, `include/`, `test/` +- Identify existing NC_Dispatch implementations to follow +- Review error handling patterns and NetCDF error codes used +- Check memory management patterns (allocation, cleanup) +- Note build system patterns (CMake, Autotools) + +**Check GitHub context:** + +- Search for related existing issues (open and closed) +- Check for relevant pull requests +- Note any blocking or related issues +- Identify available milestones or project boards + +--- + +## Step 4: Clarifying Questions + +Ask 3-7 targeted questions to better understand the problem and requirements. Focus on: + +**For bug reports:** + +- Reproduction steps and environment details +- Expected vs. actual behavior +- Error messages or logs +- When the issue started occurring +- Any recent changes that might be related + +**For feature requests:** + +- Specific use cases and workflows +- Performance requirements or constraints +- Integration with existing functionality +- User interface considerations +- Backward compatibility requirements + +**For documentation issues:** + +- Which documentation needs updating +- Target audience (users, developers, contributors) +- Missing or unclear information +- Examples that would be helpful + +Format questions as multiple choice with a recommended answer when appropriate. + +--- + +## Step 5: Dependency Analysis + +**Identify dependencies and conflicts:** + +- Check if this issue depends on other open issues +- Look for potential conflicts with in-progress work +- Identify external dependencies (libraries, systems, teams) +- Note any blocking issues or prerequisites + +**Document dependencies:** + +- List dependent issues with issue numbers +- Identify any work that must be completed first +- Note any conflicts that need resolution + +--- + +## Step 6: Implementation Plan Generation (NOT Implementation) + +Based on the answers provided, generate a structured implementation plan: + +**Plan Structure:** + +```markdown +## Executive Summary + + + +## Requirements & Acceptance Criteria + +- [ ] +- [ ] +- [ ] + +## Implementation Approach + + + +## Implementation Steps + +1. - +2. - +3. - + +## Dependencies + +- Depends on # - +- Blocks # - + +## Testing Requirements + +- [ ] Unit tests for all new or modified functions +- [ ] Unit tests for error handling paths +- [ ] Integration test for +- [ ] Manual testing for +- [ ] Test coverage verification (minimum 80% for new code) + +## Risks & Mitigations + +- + +## Notes + + +``` + +**Quality checks:** + +- ✓ Steps are concrete and actionable +- ✓ Dependencies are clearly identified +- ✓ Testing requirements are specific +- ✓ Effort estimates are reasonable +- ✓ Plan follows project conventions + +**Goal: Create a clear, actionable plan that can be executed in a separate implementation workflow.** + +--- + +## Step 7: GitHub Documentation + +Create a comprehensive follow-up comment on the GitHub issue that documents the refined requirements and implementation plan. This serves as both a record and a roadmap for future implementation. + +**Comment structure:** + +```markdown +## Issue Refinement Summary + +### Executive Summary + + + +### Requirements & Acceptance Criteria + +- [ ] +- [ ] +- [ ] + +### Implementation Plan + + + +### Next Steps + + +``` + +**Posting to GitHub:** +Use the `gh` command line tool to post the comment to the issue: + +```bash +gh issue comment --body "your comment text here" +``` + +--- + +## Step 8: Final Review & Implementation Roadmap + +Review the complete analysis for completeness and accuracy, then append the finalized implementation roadmap to the bottom of the GitHub follow-up comment. The roadmap should include: + +- A numbered list of concrete implementation steps (to be executed in a separate workflow) +- Owner/assignee for each major step +- Estimated completion dates or effort +- Dependencies between steps +- Success criteria for each milestone + +**Final validation:** + +- ✓ All questions have been answered +- ✓ Requirements are clear and testable +- ✓ Implementation plan is technically sound +- ✓ Dependencies are documented +- ✓ Testing strategy is comprehensive diff --git a/windsurf-harnett/workflows/release.md b/windsurf-harnett/workflows/release.md new file mode 100644 index 000000000..03ca43910 --- /dev/null +++ b/windsurf-harnett/workflows/release.md @@ -0,0 +1,185 @@ +--- +description: Generate GitHub release notes from issues and projects +auto_execution_mode: 3 +--- + +Generate professional GitHub release notes for a specific version by extracting information from GitHub issues, pull requests, and project boards, following the established release notes format. + +## Prerequisites + +1. Identify the version to document (user will specify, e.g., "0.5.0", "1.0.0") +2. Check `docs/releases/` for the latest release notes format examples (v0.4.0.md is comprehensive) +3. Determine the GitHub milestone or project board associated with this version +4. Get GitHub repository details (owner: Intelligent-Data-Design-Inc, repo: NEP) + +## Steps + +### 1. Gather Issues and PRs for Version + +Use GitHub MCP tools to collect: + +- **Closed issues** in the version milestone (if milestone exists) +- **Merged pull requests** associated with the version +- **Project board items** (if using GitHub Projects for version tracking) +- **Release date**: Use the date of the last merged PR or current date + +Query strategy: + +``` +# Search for issues closed in version +mcp0_search_issues: query="milestone:v{VERSION} is:closed" + +# Search for merged PRs in version +mcp0_search_pull_requests: query="is:merged milestone:v{VERSION}" + +# Alternative: Search by labels if no milestone +mcp0_search_issues: query="label:v{VERSION} is:closed" +``` + +### 2. Categorize Issues by Type + +Analyze collected issues and PRs, grouping them by: + +- **Features**: New functionality (label: enhancement, feature) +- **Bug Fixes**: Resolved issues (label: bug, fix) +- **Build System**: CMake, Autotools changes (label: build) +- **Testing**: Test coverage, new tests (label: testing) +- **Documentation**: Docs updates (label: documentation) +- **Dependencies**: New or updated libraries +- **API Changes**: NetCDF API extensions, dispatch changes +- **Performance**: Optimizations, benchmarks + +Extract from each issue: + +- Title and issue number +- Description/summary +- Implementation details from PR description +- Related files/components modified + +### 3. Review Existing Release Notes Format + +Examine the most recent release notes (e.g., `docs/releases/v0.4.0.md`) to understand: + +- Section structure and order +- Writing style (professional, concise, technical) +- How features are organized by category +- Level of technical detail expected + +### 4. Create Release Notes Structure + +Generate a markdown file named `docs/releases/v{VERSION}.md` with these sections: + +**Required Sections:** + +- **Title & Subtitle**: Version number + main theme (derived from most impactful features), followed by `>` blockquote with one-line description +- **Highlights**: 4-6 bullet points of major achievements (start with bold emphasis, reference issue numbers) +- **Features**: Organized by functional areas with detailed descriptions + - For each feature: "**[Feature Name]** (#123) - Description with implementation details" + - Group by: UDF handlers, compression, build system, file format support, etc. +- **Bug Fixes**: List significant bugs resolved with issue numbers + - Format: "**[Bug description]** (#456) - Resolution details" +- **API Changes**: New/modified NetCDF API extensions, NC_Dispatch implementations (if applicable) +- **Build System**: CMake and Autotools changes, new dependencies, configuration options +- **Testing**: Coverage metrics, test strategies, new test files added +- **Documentation**: Doxygen updates, API documentation, user guides (if applicable) +- **Dependencies Added**: New libraries (HDF5, NetCDF-C, NCEPLIBS-g2, etc.) with versions +- **Known Limitations**: Open issues or deferred work (link to GitHub issues) +- **Breaking Changes**: None or list with migration guidance + +**Footer Section:** + +- **Released**: Date (format: YYYY-MM-DD, use last merged PR date or current date) +- **Scope**: List GitHub milestone or project board name +- **Issues Closed**: Total count of issues/PRs included +- **Contributors**: GitHub usernames of PR authors (if multiple contributors) + +### 5. Writing Guidelines + +- **Be concise**: Keep total length under 100-120 lines (~1 page when rendered) +- **Use technical language**: Include file paths, function names, specific implementations +- **Reference issues**: Always include issue/PR numbers in format (#123) +- **Emphasize user impact**: Start features with user-facing benefits +- **Cross-reference docs**: Link to architecture docs, API specs, testing guides +- **Maintain consistency**: Match tone and structure of previous releases +- **Prioritize information**: Most important features first, details second +- **Credit contributors**: Mention PR authors when appropriate + +### 6. Content Transformation Rules + +When converting GitHub issues/PRs to release notes: + +- **Issue titles → Feature descriptions**: Expand with context and implementation details from PR +- **PR descriptions → Implementation details**: Extract what was built, where in codebase, which files modified +- **Labels → Section assignment**: Use issue labels to categorize into appropriate sections +- **Closed issues → Highlights**: Select 4-6 most impactful closed issues for Highlights section +- **Open issues → Known Limitations**: Reference deferred or incomplete work +- **PR merge dates → Release date**: Use the latest merge date as release date +- **Multiple related issues → Single feature**: Combine related issues into cohesive feature descriptions + +### 7. Quality Checks + +Before finalizing: + +- ✅ Version number matches milestone/project (format: v0.X.0 or v1.X.0) +- ✅ File name matches version (e.g., `v0.5.0.md` for Version 0.5.0) +- ✅ All issue numbers are valid and link correctly (#123 format) +- ✅ All sections present (even if "None" or "N/A") +- ✅ Length is under 100-120 lines +- ✅ No placeholder text like "[TODO]" or "[INSERT]" +- ✅ Technical terms are accurate (verify file paths, function names) +- ✅ Consistent formatting (bold for emphasis, code blocks for file paths) +- ✅ Issue/PR counts match actual GitHub data +- ✅ Contributors are credited appropriately + +### 8. Finalization + +- Save the file to `docs/releases/v{VERSION}.md` +- Do NOT commit or push (per user rules) +- Provide summary: "Release notes for v{VERSION} generated from {N} issues and {M} PRs" +- Inform the user the release notes are ready for review + +## Example Usage + +**Scenario 1: Version with Milestone** + +``` +User: "Create release notes for version 0.5.0" +→ Search GitHub for milestone:v0.5.0 issues and PRs +→ Categorize by labels (enhancement, bug, build, etc.) +→ Generate `docs/releases/v0.5.0.md` +→ Follow format from latest release in docs/releases/ +→ Include issue numbers (#123) for all features/fixes +→ Keep under 120 lines +``` + +**Scenario 2: Version with Label-based Tracking** + +``` +User: "Generate release notes for v0.6.0" +→ Search for label:v0.6.0 closed issues +→ Search for merged PRs with v0.6.0 label +→ Extract implementation details from PR descriptions +→ Group by functional area (GeoTIFF, CDF, build system, etc.) +→ Generate release notes with proper attribution +``` + +**Scenario 3: Version from Project Board** + +``` +User: "Create release notes from project 'Version 1.0 Release'" +→ Query GitHub project board items +→ Filter for completed/closed items +→ Extract linked issues and PRs +→ Generate comprehensive release notes +→ Credit all contributors +``` + +## Notes + +- If no milestone/label/project exists for the version, ask user to clarify which issues/PRs to include +- If some issues are still open, note them in "Known Limitations" section +- Adapt section order based on version content (e.g., skip "Dependencies" if none added) +- Use past tense for completed work, present tense for capabilities added +- Always verify issue/PR numbers are valid before including in release notes +- For versions with many issues (>20), prioritize most impactful features in Highlights +- If PR descriptions are sparse, read the actual code changes to understand implementation details diff --git a/windsurf-harnett/workflows/review.md b/windsurf-harnett/workflows/review.md new file mode 100644 index 000000000..114fd6e2b --- /dev/null +++ b/windsurf-harnett/workflows/review.md @@ -0,0 +1,72 @@ +--- +description: Review pull request and add code review comments +--- + +# Pull Request Code Review Workflow + +This workflow performs a comprehensive code review on a pull request and adds review comments directly to the PR. + +## Steps + +1. **Fetch pull request details** + - Get PR information using the pull request number + - Retrieve the list of changed files + - Get the diff for each changed file + - Identify added, modified, and deleted lines + +2. **Perform comprehensive code review** + - Analyze code quality and adherence to best practices + - Identify potential bugs or edge cases + - Look for performance optimization opportunities + - Assess readability and maintainability + - Check for security concerns + - Review documentation completeness + - Check for code duplication + - Verify error handling patterns + - Ensure unit tests are included for new/modified code + +3. **Generate review comments** + - Create a list of code review comments for each issue found + - For each comment, include: + - **File**: The file being commented on + - **Line**: Specific line number (or range) + - **Type**: Suggestion/Issue/Question/Praise + - **Comment**: Detailed feedback or question + - **Severity**: Critical/High/Medium/Low + +4. **Add review comments to PR** + - For each review comment, ask: "Add this comment to PR? (yes/no)" + - If user says **yes**: Add the comment as a PR review comment using the appropriate line + - If user says **no**: Skip and move to next comment + - Continue until all comments are processed + +5. **Submit overall review** + - Ask user for overall review status: "Approve", "Request Changes", or "Comment" + - Add summary comment with overall assessment + - Submit the review with the chosen status + +6. **Summary** + - Report how many comments were added + - List the types of issues found + - Note the overall review status + +## Usage + +``` +/review +``` + +Example: + +``` +/review 123 +``` + +## Notes + +- Comments are added as pull request review comments on specific lines +- Use appropriate comment types (suggestion, issue, question, praise) +- Include code references using GitHub's line selection +- Review focuses on code quality, not creating separate issues +- For critical issues that need separate tracking, suggest creating issues in comments +- Group related issues when appropriate diff --git a/windsurf-harnett/workflows/roadmap.md b/windsurf-harnett/workflows/roadmap.md new file mode 100644 index 000000000..ca85ce274 --- /dev/null +++ b/windsurf-harnett/workflows/roadmap.md @@ -0,0 +1,237 @@ +--- +description: Break version work into detailed sprint planning documents +--- + +# Version Roadmap Planning Workflow + +This workflow breaks down a version's work into 1-5 detailed sprint planning documents. + +## Usage + +``` +/roadmap +``` + +**Example:** `/roadmap v1.6.0 Add CF conventions for GeoTIFF metadata` + +**Output:** Creates version-specific GitHub Project "NEP Version 1.6.0 - GeoTIFF metadata to CF" with sprint breakdown + +**Note:** Only v1.6.0+ versions are supported. Historical versions are not updated. + +--- + +## Scope + +**Do:** + +- Analyze the specified version's work and break it into 1-5 sprints +- Identify technical gaps and missing requirements across all sprints +- Create version-specific GitHub Project with sprint breakdown and task organization +- Ensure logical dependency flow between sprints + +**Don't:** + +- Modify other versions not specified +- Implement code (save for `/plan` workflow) +- Create more than 5 sprints for a version +- Skip dependency analysis between sprints +- Add code changes to GitHub Project (use Issues/PRs instead) +- Update historical versions (v1.5.0 and earlier) - these are frozen + +--- + +## Project Creation Process + +### Naming Convention + +Version-specific projects follow the pattern: `NEP Version {version} - {brief description}` + +**Examples:** + +- "NEP Version 1.6.0 - GeoTIFF metadata to CF" +- "NEP Version 1.7.0 - GRIB2 write support" +- "NEP Version 1.8.0 - Performance optimization" + +### Project Structure + +Each version-specific project includes: + +- **Organization-level project** (not repository-specific) +- **Standard fields**: Version, Sprint, Task Status, Priority, Component +- **Field values**: + - Version: "{version}" for all issues + - Sprint: 1-5 for respective sprint issues + - Task Status: Backlog → In Progress → Review → Done + - Priority: High/Medium/Low + - Component: Build System/UDF Handler/Tests/Documentation +- **Issues**: Main version issue + individual sprint issues + +### Implementation Tools + +- **GitHub CLI** (`gh project create`) for automated creation +- **GitHub API** as fallback for advanced configuration +- **Manual creation** option when automation fails + +--- + +## Steps + +### 1. Validate version and create version-specific GitHub Project + +- Check if version-specific GitHub Project exists for NEP organization +- **If no project exists:** Create new organization-level GitHub Project with: + - Project name: "NEP Version {version} - {brief description}" (e.g., "NEP Version 1.6.0 - GeoTIFF metadata to CF") + - Owner: Intelligent-Data-Design-Inc (organization level) + - Fields: Version (text), Sprint (number), Task Status (single select: Backlog, In Progress, Review, Done), Priority (single select: High, Medium, Low), Component (single select: Build System, UDF Handler, Tests, Documentation) + - Add all relevant issues to the project +- Query GitHub Projects for the specified version +- **If version is v1.5.0 or earlier:** Report that historical versions are not supported and stop workflow + +### 2. Extract version details + +- Locate the specified version in GitHub Project +- **If version exists but has no items:** Create initial project items for the version based on: + - User-provided version description from the command prompt + - Known technical components and requirements + - Standard sprint structure (file operations, metadata, data reading, etc.) +- Extract all existing work items, goals, and requirements from project items +- Identify any existing sprint breakdown from project fields +- Note incomplete or ambiguous requirements from item descriptions +- Assess total scope and complexity from linked Issues/PRs + +### 3. Review related documentation for context + +- Check `docs/prd.md` for product requirements and functional specifications +- Review `docs/design.md` for architecture and technical design +- Review existing sprint plans in `docs/plan/` to understand format and detail level +- Check source code in `src/`, `include/`, and `test/` for existing patterns + +### 4. Analyze version scope and identify optimal sprint breakdown + +- Review all work items for completeness and clarity +- Use the user-provided version description as the primary source for requirements +- Group related tasks into logical sprint units (1-5 sprints total) +- Identify dependencies between task groups +- Ensure each sprint has a coherent theme and deliverable +- Check for missing implementation details (NC_Dispatch functions, data structures, error handling) +- Verify testing requirements (C unit tests, Fortran tests, integration tests) +- Assess build system integration (CMake and Autotools) +- Identify dependency requirements and version constraints + +### 5. Ask 3-6 numbered clarifying questions + +**Focus areas:** + +- Sprint organization and dependency flow based on user-provided description +- Ambiguous requirements or missing technical details from the initial prompt +- Error handling strategies and NetCDF error code mapping +- Testing expectations and coverage targets per sprint +- Build system integration and dependency detection +- Memory management and resource cleanup +- Integration with native format libraries (NCEPLIBS-g2, libgeotiff, NASA CDF) + +**Question format:** + +- Provide suggested answers, labeled with letters, so questions can be answered with just a letter +- Recommend an answer and provide justification for your recommendation +- Ask minimum 3 questions, maximum 6 questions + +**⚠️ ASK QUESTIONS NOW** + +**Wait for user responses before proceeding** + +**If answers are incomplete:** + +- Ask follow-up questions to clarify +- Maximum 2 rounds of questions total + +### 6. Update roadmap with sprint breakdown + +- Incorporate answers from step 5 into GitHub Project +- Create 1-5 sprint sections using project fields and organization +- Distribute work items logically across sprints by updating item fields +- Clarify ambiguous tasks with specific details in item descriptions +- Add dependency information between project items +- Update definition of done for each sprint in project metadata +- Ensure each sprint builds incrementally toward version goals +- Link related Issues/PRs to project items for implementation + +**Important:** + +- **DO NOT** add proposed code changes to project item descriptions +- Code implementation should be tracked in separate Issues/PRs linked to project items +- If example code is presented by the user, that may be included in item descriptions + +### 7. Update related documentation (if needed) + +**Only update if clarifications revealed:** + +- Architecture changes → Update `docs/design.md` +- Functional requirement changes → Update `docs/prd.md` +- NC_Dispatch or UDF handler design notes → Add to `docs/design.md` + +**Skip this step if:** No architectural or functional changes were clarified + +### 8. Verify completion criteria + +**Confirm all items are satisfied:** + +- ✓ Version work is broken into 1-5 logical sprints +- ✓ Each sprint has a coherent theme and clear deliverables +- ✓ Dependencies between sprints are documented +- ✓ All sprint tasks are clearly defined +- ✓ Implementation approach is specified for each major task +- ✓ Testing requirements are identified for each sprint +- ✓ Build system integration is addressed +- ✓ Dependencies are documented +- ✓ Error handling strategy is defined +- ✓ GitHub Project is updated with all clarifications and sprint organization + +**If any criteria are missing:** Return to step 5 and ask additional questions + +**Next step:** User can run `/plan sprint ` to create detailed implementation plan for individual sprints + +--- + +## Error Handling + +**If version not found:** + +- Report "Version {version} not found in GitHub Project" +- List available versions from project +- Stop workflow + +**If version is v1.5.0 or earlier:** + +- Report "Historical versions (v1.5.0 and earlier) are not supported" +- Stop workflow + +**If version scope is too large for 5 sprints:** + +- Report: "Version {version} work is too complex for 5 sprints" +- Suggest breaking into multiple versions or reducing scope +- Ask user how to proceed + +**If version has insufficient work for 1 sprint:** + +- Report: "Version {version} has minimal work, consider combining with another version" +- Ask user if they want to proceed with a single sprint + +**If dependencies not met:** + +- Report: "Previous version dependencies not satisfied" +- List missing dependencies +- Ask user if they want to proceed anyway + +**If GitHub Project creation fails:** + +- Report specific GitHub API errors for project creation +- Suggest manual project creation with the naming convention "NEP Version {version} - {brief description}" +- Provide field configuration details for manual setup +- Stop workflow + +**If GitHub Project operations fail:** + +- Report specific GitHub API errors +- Suggest manual project updates as fallback +- Continue with documentation updates if possible diff --git a/windsurf-harnett/workflows/test.md b/windsurf-harnett/workflows/test.md new file mode 100644 index 000000000..012cdc031 --- /dev/null +++ b/windsurf-harnett/workflows/test.md @@ -0,0 +1,121 @@ +--- +description: run C/Fortran tests and validate implementation +auto_execution_mode: 3 +--- + +# C/Fortran Testing and Validation Workflow + +This workflow runs all C and Fortran tests to validate implementation against issue requirements. + +## Prerequisites + +Ensure the project is built with both CMake and Autotools build systems: + +```bash +cd /home/ed/NEP +# CMake build +mkdir -p build && cd build +cmake .. +make +cd .. +# Autotools build +./autogen.sh +mkdir -p build_autotools && cd build_autotools +../configure +make +cd .. +``` + +## Steps + +### 1. Parse Issue Implementation Plan + +- Read the GitHub issue to identify testing requirements from the implementation plan +- Extract specific test scenarios mentioned in the issue +- Note any special testing requirements (edge cases, error conditions, etc.) + +### 2. Run CMake Tests (C tests) + +// turbo + +```bash +cd /home/ed/NEP/build +ctest --output-on-failure +``` + +**Expected:** All C tests pass. Tests include LZ4 compression tests and UDF handler tests. + +### 3. Run Autotools Tests (C tests) + +// turbo + +```bash +cd /home/ed/NEP/build_autotools/test +./run_tests.sh +``` + +**Expected:** All C tests pass with proper HDF5 plugin path configuration. + +### 4. Run Fortran Tests (if built) + +// turbo + +```bash +cd /home/ed/NEP/build_autotools/ftest +./run_tests.sh +``` + +**Expected:** All Fortran tests pass. Tests include nep compression tests. + +### 5. Verify Test Coverage Requirements + +// turbo + +```bash +cd /home/ed/NEP/build +# Generate coverage report +gcov -r ../src/*.c ../src/*.h +# Calculate coverage percentage +coverage=$(gcov -r ../src/*.c | grep "Lines executed:" | awk '{sum += $3} END {print sum/NR}') +echo "Coverage: $coverage%" +``` + +**Expected:** Minimum 80% coverage for new/modified code as specified in issue requirements. + +### 6. Check for Compiler Warnings + +```bash +cd /home/ed/NEP/build +make clean +make VERBOSE=1 2>&1 | grep -i "warning" +``` + +**Expected:** No new warnings introduced. Review any warnings and fix if critical. + +### 7. Report Results to GitHub Issue + +Post a summary comment on the GitHub issue with test results: + +- Test execution status (pass/fail) +- Coverage percentage (meets requirements?) +- Any warnings found +- Link to detailed test logs if needed + +## Success Criteria + +✅ **CMake Tests**: All CTest tests pass (LZ4, UDF handlers) +✅ **Autotools C Tests**: All tests in test/run_tests.sh pass +✅ **Fortran Tests**: All tests in ftest/run_tests.sh pass (if Fortran enabled) +✅ **Test Coverage**: Minimum 80% coverage for new/modified code +✅ **Compiler Warnings**: No critical warnings introduced +✅ **GitHub Reporting**: Results posted to issue with clear status + +## Troubleshooting + +- **Test failures**: Check test output for specific failures, verify HDF5_PLUGIN_PATH is set correctly +- **Missing dependencies**: Ensure HDF5, NetCDF-C, NetCDF-Fortran, and LZ4 are installed +- **Plugin path issues**: Verify HDF5_PLUGIN_PATH includes the LZ4 plugin directory +- **Build failures**: Clean build directories and rebuild from scratch +- **GRIB2 test failures**: Ensure NCEPLIBS-g2 is installed if GRIB2 UDF handler is enabled + +**ALL TESTS MUST PASS, NO TESTS MAY BE LEFT BROKEN**