diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a00ad83c2c..7500f6c949 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -191,12 +191,17 @@ jobs: MINGW_ASM_MASM_COMPILER: llvm-ml MINGW_ASM_MASM_FLAGS: -m64 - name: Android (API 21, NDK 23) - os: macos-15-large + os: ubuntu-latest ANDROID_API: 21 ANDROID_NDK: 23.2.8568313 ANDROID_ARCH: x86_64 + - name: Android (API 26, NDK 27) + os: ubuntu-latest + ANDROID_API: 26 + ANDROID_NDK: 27.3.13750724 + ANDROID_ARCH: x86_64 - name: Android (API 31, NDK 27) - os: macos-15-large + os: ubuntu-latest ANDROID_API: 31 ANDROID_NDK: 27.3.13750724 ANDROID_ARCH: x86_64 @@ -242,12 +247,12 @@ jobs: cache: "pip" - name: Check Linux CC/CXX - if: ${{ runner.os == 'Linux' && !matrix.container }} + if: ${{ runner.os == 'Linux' && !env['ANDROID_API'] &&!matrix.container }} run: | [ -n "$CC" ] && [ -n "$CXX" ] || { echo "Ubuntu runner configurations require toolchain selection via CC and CXX" >&2; exit 1; } - name: Installing Linux Dependencies - if: ${{ runner.os == 'Linux' && !env['TEST_X86'] && !matrix.container }} + if: ${{ runner.os == 'Linux' && !env['TEST_X86'] && !env['ANDROID_API'] && !matrix.container }} run: | sudo apt update # Install common dependencies @@ -278,7 +283,7 @@ jobs: sudo make install - name: Installing Linux 32-bit Dependencies - if: ${{ runner.os == 'Linux' && env['TEST_X86'] && !matrix.container }} + if: ${{ runner.os == 'Linux' && env['TEST_X86'] && !env['ANDROID_API'] &&!matrix.container }} run: | sudo dpkg --add-architecture i386 sudo apt update @@ -357,6 +362,22 @@ jobs: with: gradle-home-cache-cleanup: true + - name: Setup .NET for Android + if: ${{ env['ANDROID_API'] }} + uses: actions/setup-dotnet@v5 + with: + dotnet-version: '10.0.x' + + - name: Install .NET Android workload + if: ${{ env['ANDROID_API'] }} + run: dotnet workload restore tests/fixtures/dotnet_signal/test_dotnet.csproj + + - name: Enable KVM group perms + if: ${{ runner.os == 'Linux' && env['ANDROID_API'] }} + run: | + echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' | sudo tee /etc/udev/rules.d/99-kvm4all.rules + sudo udevadm control --reload-rules + sudo udevadm trigger --name-match=kvm - name: Add sentry.native.test hostname if: ${{ runner.os == 'Windows' }} @@ -386,7 +407,7 @@ jobs: api-level: ${{ env.ANDROID_API }} ndk: ${{ env.ANDROID_NDK }} arch: ${{ env.ANDROID_ARCH }} - target: google_apis + target: default emulator-boot-timeout: 1200 script: | # Sync emulator clock with host to avoid timestamp assertion failures diff --git a/CHANGELOG.md b/CHANGELOG.md index a57e0d348d..880dedccdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ # Changelog -## Unreleased: +## Unreleased **Fixes**: - inproc: only the handling thread cleans up after the crash. ([#1579](https://github.com/getsentry/sentry-native/pull/1579)) +- Fix `CHAIN_AT_START` handler strategy crashing on Android when the chained Mono handler resets the signal handler and re-raises. ([#1572](https://github.com/getsentry/sentry-native/pull/1572)) ## 0.13.2 diff --git a/src/backends/sentry_backend_inproc.c b/src/backends/sentry_backend_inproc.c index 53aa0af54c..85c721674e 100644 --- a/src/backends/sentry_backend_inproc.c +++ b/src/backends/sentry_backend_inproc.c @@ -26,6 +26,9 @@ #ifdef SENTRY_PLATFORM_UNIX # include #endif +#ifdef SENTRY_PLATFORM_ANDROID +# include +#endif #include /** @@ -1564,6 +1567,29 @@ process_ucontext(const sentry_ucontext_t *uctx) uintptr_t ip = get_instruction_pointer(uctx); uintptr_t sp = get_stack_pointer(uctx); +# ifdef SENTRY_PLATFORM_ANDROID + // Mask the signal so SA_NODEFER doesn't let re-raises from the chained + // handler kill the process before we regain control. + sigset_t mask, old_mask; + sigemptyset(&mask); + sigaddset(&mask, uctx->signum); + // Raw syscall because ART's libsigchain intercepts + // sigprocmask() and silently drops the request when called + // outside its own special handlers. Without the raw syscall + // the mask change would be ignored and SA_NODEFER would let + // the chained handler's raise() re-deliver the signal + // immediately, crashing the process before we can inspect + // the modified IP/SP. + // + // DANGER: this makes libsigchain's internal mask state + // diverge from the kernel's actual mask. If ART ever relies + // on that state for correctness (e.g. GC safepoints), this + // could cause subtle failures. We restore the mask right + // after the chained handler returns, limiting the window. + syscall( + SYS_rt_sigprocmask, SIG_BLOCK, &mask, &old_mask, sizeof(sigset_t)); +# endif + // invoke the previous handler (typically the CLR/Mono // signal-to-managed-exception handler) invoke_signal_handler( @@ -1579,6 +1605,23 @@ process_ucontext(const sentry_ucontext_t *uctx) return; } +# ifdef SENTRY_PLATFORM_ANDROID + // restore our handler + struct sigaction current; + sigaction(uctx->signum, NULL, ¤t); + if (current.sa_handler == SIG_DFL) { + sigaction(uctx->signum, &g_sigaction, NULL); + } + + // consume pending signal + struct timespec timeout = { 0, 0 }; + syscall(SYS_rt_sigtimedwait, &mask, NULL, &timeout, sizeof(sigset_t)); + + // unmask + syscall( + SYS_rt_sigprocmask, SIG_SETMASK, &old_mask, NULL, sizeof(sigset_t)); +# endif + // return from runtime handler; continue processing the crash on the // signal thread until the worker takes over } diff --git a/tests/fixtures/dotnet_signal/Directory.Build.props b/tests/fixtures/dotnet_signal/Directory.Build.props new file mode 100644 index 0000000000..cac7f5ab06 --- /dev/null +++ b/tests/fixtures/dotnet_signal/Directory.Build.props @@ -0,0 +1,2 @@ + + diff --git a/tests/fixtures/dotnet_signal/Platforms/Android/MainActivity.cs b/tests/fixtures/dotnet_signal/Platforms/Android/MainActivity.cs new file mode 100644 index 0000000000..a6b35ceba3 --- /dev/null +++ b/tests/fixtures/dotnet_signal/Platforms/Android/MainActivity.cs @@ -0,0 +1,31 @@ +using Android.App; +using Android.OS; + +// Required for "adb shell run-as" to access the app's data directory in Release builds +[assembly: Application(Debuggable = true)] + +namespace dotnet_signal; + +[Activity(Name = "dotnet_signal.MainActivity", MainLauncher = true)] +public class MainActivity : Activity +{ + protected override void OnResume() + { + base.OnResume(); + + var arg = Intent?.GetStringExtra("arg"); + if (!string.IsNullOrEmpty(arg)) + { + var databasePath = FilesDir?.AbsolutePath + "/.sentry-native"; + + // Post to the message queue so the activity finishes starting + // before the crash test runs. Without this, "am start -W" may hang. + new Handler(Looper.MainLooper!).Post(() => + { + Program.RunTest(new[] { arg }, databasePath); + FinishAndRemoveTask(); + Java.Lang.JavaSystem.Exit(0); + }); + } + } +} diff --git a/tests/fixtures/dotnet_signal/Program.cs b/tests/fixtures/dotnet_signal/Program.cs index 4e6217ac3e..c21e10ef71 100644 --- a/tests/fixtures/dotnet_signal/Program.cs +++ b/tests/fixtures/dotnet_signal/Program.cs @@ -20,10 +20,13 @@ class Program [DllImport("sentry", EntryPoint = "sentry_options_set_debug")] static extern IntPtr sentry_options_set_debug(IntPtr options, int debug); + [DllImport("sentry", EntryPoint = "sentry_options_set_database_path")] + static extern void sentry_options_set_database_path(IntPtr options, string path); + [DllImport("sentry", EntryPoint = "sentry_init")] static extern int sentry_init(IntPtr options); - static void Main(string[] args) + public static void RunTest(string[] args, string? databasePath = null) { var githubActions = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") ?? string.Empty; if (githubActions == "true") { @@ -38,10 +41,13 @@ static void Main(string[] args) var options = sentry_options_new(); sentry_options_set_handler_strategy(options, 1); sentry_options_set_debug(options, 1); + if (databasePath != null) + { + sentry_options_set_database_path(options, databasePath); + } sentry_init(options); - var doNativeCrash = args is ["native-crash"]; - if (doNativeCrash) + if (args.Contains("native-crash")) { native_crash(); } @@ -51,9 +57,9 @@ static void Main(string[] args) { Console.WriteLine("dereference a NULL object from managed code"); var s = default(string); - var c = s.Length; + var c = s!.Length; } - catch (NullReferenceException exception) + catch (NullReferenceException) { } } @@ -61,7 +67,14 @@ static void Main(string[] args) { Console.WriteLine("dereference a NULL object from managed code (unhandled)"); var s = default(string); - var c = s.Length; + var c = s!.Length; } } + +#if !ANDROID + static void Main(string[] args) + { + RunTest(args); + } +#endif } \ No newline at end of file diff --git a/tests/fixtures/dotnet_signal/test_dotnet.csproj b/tests/fixtures/dotnet_signal/test_dotnet.csproj index 238f157e2e..e266400d00 100644 --- a/tests/fixtures/dotnet_signal/test_dotnet.csproj +++ b/tests/fixtures/dotnet_signal/test_dotnet.csproj @@ -1,8 +1,23 @@ Exe - net10.0 + net10.0 + $(TargetFrameworks);net10.0-android enable enable + + + io.sentry.ndk.dotnet.signal.test + 21 + true + + + + + + + + + diff --git a/tests/test_build_static.py b/tests/test_build_static.py index 36d502c957..6dab8ca505 100644 --- a/tests/test_build_static.py +++ b/tests/test_build_static.py @@ -2,7 +2,7 @@ import sys import os import pytest -from .conditions import has_breakpad, has_crashpad, has_native +from .conditions import has_breakpad, has_crashpad, has_native, is_android def test_static_lib(cmake): @@ -16,7 +16,7 @@ def test_static_lib(cmake): ) # on linux we can use `ldd` to check that we don’t link to `libsentry.so` - if sys.platform == "linux": + if sys.platform == "linux" and not is_android: output = subprocess.check_output("ldd sentry_example", cwd=tmp_path, shell=True) assert b"libsentry.so" not in output diff --git a/tests/test_dotnet_signals.py b/tests/test_dotnet_signals.py index 7c4e2a70dd..b4bf6544da 100644 --- a/tests/test_dotnet_signals.py +++ b/tests/test_dotnet_signals.py @@ -3,10 +3,11 @@ import shutil import subprocess import sys +import time import pytest -from tests.conditions import is_tsan, is_x86, is_asan +from tests.conditions import is_android, is_tsan, is_x86, is_asan project_fixture_path = pathlib.Path("tests/fixtures/dotnet_signal") @@ -49,19 +50,23 @@ def run_dotnet(tmp_path, args): def run_dotnet_managed_exception(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "managed-exception"]) + return run_dotnet( + tmp_path, ["dotnet", "run", "-f:net10.0", "--", "managed-exception"] + ) def run_dotnet_unhandled_managed_exception(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "unhandled-managed-exception"]) + return run_dotnet( + tmp_path, ["dotnet", "run", "-f:net10.0", "--", "unhandled-managed-exception"] + ) def run_dotnet_native_crash(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run", "native-crash"]) + return run_dotnet(tmp_path, ["dotnet", "run", "-f:net10.0", "--", "native-crash"]) @pytest.mark.skipif( - sys.platform != "linux" or is_x86 or is_asan or is_tsan, + bool(sys.platform != "linux" or is_x86 or is_asan or is_tsan or is_android), reason="dotnet signal handling is currently only supported on 64-bit Linux without sanitizers", ) def test_dotnet_signals_inproc(cmake): @@ -165,7 +170,7 @@ def run_aot_native_crash(tmp_path): @pytest.mark.skipif( - sys.platform != "linux" or is_x86 or is_asan or is_tsan, + bool(sys.platform != "linux" or is_x86 or is_asan or is_tsan or is_android), reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers", ) def test_aot_signals_inproc(cmake): @@ -199,6 +204,7 @@ def test_aot_signals_inproc(cmake): [ "dotnet", "publish", + "-f:net10.0", "-p:PublishAot=true", "-p:Configuration=Release", "-o", @@ -255,3 +261,191 @@ def test_aot_signals_inproc(cmake): shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True) shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) shutil.rmtree(project_fixture_path / "obj", ignore_errors=True) + + +ANDROID_PACKAGE = "io.sentry.ndk.dotnet.signal.test" + + +def wait_for(condition, timeout=10, interval=0.5): + start = time.time() + while time.time() - start < timeout: + if condition(): + return True + time.sleep(interval) + return condition() + + +def adb(*args, **kwargs): + adb_path = "{}/platform-tools/adb".format(os.environ["ANDROID_HOME"]) + return subprocess.run([adb_path, *args], **kwargs) + + +def run_android(args=None, timeout=30): + if args is None: + args = [] + adb("logcat", "-c") + adb("shell", "pm", "clear", ANDROID_PACKAGE) + intent_args = [] + for arg in args: + intent_args += ["--es", "arg", arg] + try: + adb( + "shell", + "am", + "start", + "-W", + "-n", + "{}/dotnet_signal.MainActivity".format(ANDROID_PACKAGE), + *intent_args, + check=True, + timeout=10, + ) + except subprocess.TimeoutExpired: + pass + wait_for( + lambda: adb( + "shell", "pidof", ANDROID_PACKAGE, capture_output=True, text=True + ).returncode + != 0, + timeout=timeout, + ) + return adb("logcat", "-d", capture_output=True, text=True).stdout + + +def run_android_managed_exception(): + return run_android(["managed-exception"]) + + +def run_android_unhandled_managed_exception(): + return run_android(["unhandled-managed-exception"]) + + +def run_android_native_crash(): + return run_android(["native-crash"]) + + +@pytest.mark.skipif( + not is_android or int(is_android) < 26, + reason="needs Android API 26+ (tombstoned)", +) +def test_android_signals_inproc(cmake): + if shutil.which("dotnet") is None: + pytest.skip("dotnet is not installed") + + arch = os.environ.get("ANDROID_ARCH", "x86_64") + rid_map = { + "x86_64": "android-x64", + "x86": "android-x86", + "arm64-v8a": "android-arm64", + "armeabi-v7a": "android-arm", + } + + try: + tmp_path = cmake( + ["sentry"], + {"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"}, + ) + + # build libcrash.so with NDK clang + ndk_prebuilt = pathlib.Path( + "{}/ndk/{}/toolchains/llvm/prebuilt".format( + os.environ["ANDROID_HOME"], os.environ["ANDROID_NDK"] + ) + ) + triples = { + "x86_64": "x86_64-linux-android", + "x86": "i686-linux-android", + "arm64-v8a": "aarch64-linux-android", + "armeabi-v7a": "armv7a-linux-androideabi", + } + ndk_clang = str( + next(ndk_prebuilt.iterdir()) + / "bin" + / "{}{}-clang".format(triples[arch], os.environ["ANDROID_API"]) + ) + native_lib_dir = project_fixture_path / "native" / arch + native_lib_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(tmp_path / "libsentry.so", native_lib_dir / "libsentry.so") + subprocess.run( + [ + ndk_clang, + "-Wall", + "-Wextra", + "-fPIC", + "-shared", + str(project_fixture_path / "crash.c"), + "-o", + str(native_lib_dir / "libcrash.so"), + ], + check=True, + ) + + # build and install the APK + subprocess.run( + [ + "dotnet", + "build", + "-f:net10.0-android", + "-p:RuntimeIdentifier={}".format(rid_map[arch]), + "-p:Configuration=Release", + ], + cwd=project_fixture_path, + check=True, + ) + apk_dir = ( + project_fixture_path / "bin" / "Release" / "net10.0-android" / rid_map[arch] + ) + apk_path = next(apk_dir.glob("*-Signed.apk")) + adb("install", "-r", str(apk_path), check=True) + + def run_as(cmd, **kwargs): + return adb( + "shell", + 'run-as {} sh -c "{}"'.format(ANDROID_PACKAGE, cmd), + **kwargs, + ) + + db = "files/.sentry-native" + + def file_exists(path): + return run_as("test -f " + path, capture_output=True).returncode == 0 + + def dir_exists(path): + return run_as("test -d " + path, capture_output=True).returncode == 0 + + def has_envelope(): + result = run_as( + "find " + db + " -name '*.envelope'", capture_output=True, text=True + ) + return bool(result.stdout.strip()) + + # managed exception: handled, no crash + logcat = run_android_managed_exception() + assert not ( + "NullReferenceException" in logcat + ), f"Managed exception leaked.\nlogcat:\n{logcat}" + assert wait_for(lambda: dir_exists(db)), "No database-path exists" + assert not file_exists(db + "/last_crash"), "A crash was registered" + assert not has_envelope(), "Unexpected envelope found" + + # unhandled managed exception: Mono calls exit(1), the native SDK + # should not register a crash (sentry-dotnet handles this at the + # managed layer via UnhandledExceptionRaiser) + logcat = run_android_unhandled_managed_exception() + assert ( + "NullReferenceException" in logcat + ), f"Expected NullReferenceException.\nlogcat:\n{logcat}" + assert wait_for(lambda: dir_exists(db)), "No database-path exists" + assert not file_exists(db + "/last_crash"), "A crash was registered" + assert not has_envelope(), "Unexpected envelope found" + + # native crash + run_android_native_crash() + assert wait_for(lambda: file_exists(db + "/last_crash")), "Crash marker missing" + assert wait_for(has_envelope), "Crash envelope is missing" + + finally: + shutil.rmtree(project_fixture_path / "native", ignore_errors=True) + shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) + shutil.rmtree(project_fixture_path / "obj", ignore_errors=True) + adb("uninstall", ANDROID_PACKAGE, check=False)