From abd2b9c01cb1b5f8c801fd00936e5b566efd8e98 Mon Sep 17 00:00:00 2001 From: RobertLD Date: Thu, 16 Apr 2026 08:48:45 -0400 Subject: [PATCH 1/2] #33823 Improve err msgs when opening files that are the wrong format --- cpp/src/arrow/ipc/message.cc | 12 ++++++++++++ cpp/src/arrow/ipc/reader.cc | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index 84ee62fe9e8..1bcf20933ea 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -565,6 +565,18 @@ Status DecodeMessage(MessageDecoder* decoder, io::InputStream* file) { auto metadata_length = decoder->next_required_size(); ARROW_ASSIGN_OR_RAISE(auto metadata, file->Read(metadata_length)); if (metadata->size() != metadata_length) { + // The first sizeof(int32_t) bytes of the Arrow file magic ("ARRO") may have been + // misread as metadata_length. Check if the remaining bytes complete the magic. + const auto remaining_magic = internal::kArrowMagicBytes.substr(sizeof(int32_t)); + if (metadata->size() >= static_cast(remaining_magic.size()) && + std::string_view(reinterpret_cast(metadata->data()), + remaining_magic.size()) == remaining_magic) { + return Status::Invalid( + "Expected to read ", metadata_length, " metadata bytes, but only read ", + metadata->size(), + ". This appears to be an Arrow IPC File format file. " + "Try open_file() instead of open_stream()."); + } return Status::Invalid("Expected to read ", metadata_length, " metadata bytes, but ", "only read ", metadata->size()); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 58008138430..317e57ba16c 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1890,7 +1890,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { const auto magic_start = buffer->data() + sizeof(int32_t); if (std::string_view(reinterpret_cast(magic_start), kMagicSize) != kArrowMagicBytes) { - return Status::Invalid("Not an Arrow file"); + return Status::Invalid("Not an Arrow file. If this is an Arrow IPC Streaming format file, try open_stream() instead."); } int32_t footer_length = bit_util::FromLittleEndian( From db7dd635e3376d0996439f941b4ad154bd4708ca Mon Sep 17 00:00:00 2001 From: RobertLD Date: Thu, 16 Apr 2026 08:52:41 -0400 Subject: [PATCH 2/2] 33823 Formatting + linting --- cpp/src/arrow/ipc/message.cc | 9 ++++----- cpp/src/arrow/ipc/reader.cc | 4 +++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index 1bcf20933ea..285c7b1e567 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -571,11 +571,10 @@ Status DecodeMessage(MessageDecoder* decoder, io::InputStream* file) { if (metadata->size() >= static_cast(remaining_magic.size()) && std::string_view(reinterpret_cast(metadata->data()), remaining_magic.size()) == remaining_magic) { - return Status::Invalid( - "Expected to read ", metadata_length, " metadata bytes, but only read ", - metadata->size(), - ". This appears to be an Arrow IPC File format file. " - "Try open_file() instead of open_stream()."); + return Status::Invalid("Expected to read ", metadata_length, + " metadata bytes, but only read ", metadata->size(), + ". This appears to be an Arrow IPC File format file. " + "Try open_file() instead of open_stream()."); } return Status::Invalid("Expected to read ", metadata_length, " metadata bytes, but ", "only read ", metadata->size()); diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 317e57ba16c..ead4e50672a 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1890,7 +1890,9 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { const auto magic_start = buffer->data() + sizeof(int32_t); if (std::string_view(reinterpret_cast(magic_start), kMagicSize) != kArrowMagicBytes) { - return Status::Invalid("Not an Arrow file. If this is an Arrow IPC Streaming format file, try open_stream() instead."); + return Status::Invalid( + "Not an Arrow file. If this is an Arrow IPC Streaming format file, try " + "open_stream() instead."); } int32_t footer_length = bit_util::FromLittleEndian(