diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index 84ee62fe9e8..285c7b1e567 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -565,6 +565,17 @@ Status DecodeMessage(MessageDecoder* decoder, io::InputStream* file) { auto metadata_length = decoder->next_required_size(); ARROW_ASSIGN_OR_RAISE(auto metadata, file->Read(metadata_length)); if (metadata->size() != metadata_length) { + // The first sizeof(int32_t) bytes of the Arrow file magic ("ARRO") may have been + // misread as metadata_length. Check if the remaining bytes complete the magic. + const auto remaining_magic = internal::kArrowMagicBytes.substr(sizeof(int32_t)); + if (metadata->size() >= static_cast(remaining_magic.size()) && + std::string_view(reinterpret_cast(metadata->data()), + remaining_magic.size()) == remaining_magic) { + return Status::Invalid("Expected to read ", metadata_length, + " metadata bytes, but only read ", metadata->size(), + ". This appears to be an Arrow IPC File format file. " + "Try open_file() instead of open_stream()."); + } return Status::Invalid("Expected to read ", metadata_length, " metadata bytes, but ", "only read ", metadata->size()); } diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 58008138430..ead4e50672a 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -1890,7 +1890,9 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { const auto magic_start = buffer->data() + sizeof(int32_t); if (std::string_view(reinterpret_cast(magic_start), kMagicSize) != kArrowMagicBytes) { - return Status::Invalid("Not an Arrow file"); + return Status::Invalid( + "Not an Arrow file. If this is an Arrow IPC Streaming format file, try " + "open_stream() instead."); } int32_t footer_length = bit_util::FromLittleEndian(