diff --git a/src/core/src/aio.test.cpp b/src/core/src/aio.test.cpp index 6c00e9165..6c5fcb0c9 100644 --- a/src/core/src/aio.test.cpp +++ b/src/core/src/aio.test.cpp @@ -253,3 +253,46 @@ TEST(core, operation_failed) EXPECT_TRUE(utils::filesystem::remove_path(tmp_file)); } + +// Pins the zero-length aio contract that the NFS file-copy plugin depends on: +// a zero-length write/read completes with ERR_HANDLE_EOF (not ERR_OK) and count 0. +// Because of this, nfs_service_impl::on_copy / nfs_client_impl::continue_write must +// special-case empty (size==0) files instead of issuing a zero-length read/write, +// which would otherwise be reported to the copy caller as a failure. +TEST(core, aio_zero_length) +{ + // if in dsn_mimic_app() and disk_io_mode == IOE_PER_QUEUE + if (task::get_current_disk() == nullptr) return; + + ASSERT_TRUE(::dsn::utils::test::prepare_test_tmp_dir("dsn.core.aio_zero")); + const std::string tmp_file = + ::dsn::utils::test::test_tmp_path("dsn.core.aio_zero", "zero_test_file"); + + ::dsn::error_code werr; + size_t wcount = 0xdead; + ::dsn::error_code rerr; + size_t rcount = 0xdead; + char buffer[8] = {}; + + // zero-length write to a freshly created file + auto fp = dsn_file_open(tmp_file.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666); + ASSERT_TRUE(fp != nullptr); + auto t = ::dsn::file::write(fp, buffer, 0, 0, LPC_AIO_TEST, nullptr, + [&werr, &wcount](::dsn::error_code e, size_t n) { werr = e; wcount = n; }, 0); + t->wait(); + EXPECT_TRUE(werr == ERR_HANDLE_EOF); + EXPECT_EQ(0u, wcount); + dsn_file_close(fp); + + // zero-length read from the (now empty) file + auto fp2 = dsn_file_open(tmp_file.c_str(), O_RDONLY | O_BINARY, 0); + ASSERT_TRUE(fp2 != nullptr); + t = ::dsn::file::read(fp2, buffer, 0, 0, LPC_AIO_TEST, nullptr, + [&rerr, &rcount](::dsn::error_code e, size_t n) { rerr = e; rcount = n; }, 0); + t->wait(); + EXPECT_TRUE(rerr == ERR_HANDLE_EOF); + EXPECT_EQ(0u, rcount); + dsn_file_close(fp2); + + EXPECT_TRUE(utils::filesystem::remove_path(tmp_file)); +} diff --git a/src/core/src/disk_engine.cpp b/src/core/src/disk_engine.cpp index 9b3bf0ef5..4f0e5f752 100644 --- a/src/core/src/disk_engine.cpp +++ b/src/core/src/disk_engine.cpp @@ -147,14 +147,25 @@ aio_task* disk_file::on_write_completed(aio_task* wk, void* ctx, error_code err, if (err == ERR_OK) { size_t this_size = (size_t)wk->aio()->buffer_size; - dassert(size >= this_size, - "written buffer size does not equal to input buffer's size: %d vs %d", - (int)size, - (int)this_size - ); - - wk->enqueue(err, this_size); - size -= this_size; + if (size < this_size) + { + // Short write from the underlying provider (e.g. the disk is full): + // fewer bytes were written than this task requested. Report a + // file-operation failure for this task and the remaining batched + // tasks (whose data was not written either) through the aio callback + // channel instead of aborting the whole process. + derror("disk write completed with fewer bytes than requested: %d vs %d", + (int)size, + (int)this_size + ); + wk->enqueue(ERR_FILE_OPERATION_FAILED, size); + size = 0; + } + else + { + wk->enqueue(err, this_size); + size -= this_size; + } } else { diff --git a/src/dev/cpp/file_utils.cpp b/src/dev/cpp/file_utils.cpp index 23ce9135b..03a89ad0a 100644 --- a/src/dev/cpp/file_utils.cpp +++ b/src/dev/cpp/file_utils.cpp @@ -542,8 +542,18 @@ namespace dsn { { bool succ; - dassert((typeflag == FTW_F) || (typeflag == FTW_DP), - "Invalid typeflag = %d.", typeflag); + // nftw reports FTW_NS when stat() fails on an entry (a transient + // I/O error, a permission problem, or a concurrent removal) and + // FTW_DNR for an unreadable directory. These are recoverable, so + // stop the walk and let remove_directory()/remove_path() return + // false through their existing bool channel instead of aborting + // the whole process. + if ((typeflag != FTW_F) && (typeflag != FTW_DP)) + { + dwarn("remove path %s failed: unexpected file tree walk typeflag = %d", + fpath, typeflag); + return FTW_STOP; + } #ifdef _WIN32 if (typeflag != FTW_F) { diff --git a/src/plugins/tools.common/native_aio_provider.linux.cpp b/src/plugins/tools.common/native_aio_provider.linux.cpp index a1c7846dc..15f415e1e 100644 --- a/src/plugins/tools.common/native_aio_provider.linux.cpp +++ b/src/plugins/tools.common/native_aio_provider.linux.cpp @@ -59,7 +59,12 @@ namespace dsn { native_linux_aio_provider::~native_linux_aio_provider() { auto ret = io_destroy(_ctx); - dassert(ret == 0, "io_destroy error, ret = %d", ret); + if (ret != 0) + { + // A destructor must not abort. io_destroy can fail (e.g. EINVAL on an + // already-released context); log and continue so shutdown proceeds. + derror("io_destroy error, ret = %d", ret); + } } void native_linux_aio_provider::start(io_modifer& ctx) diff --git a/src/plugins/tools.nfs/nfs_client_impl.cpp b/src/plugins/tools.nfs/nfs_client_impl.cpp index e29bf6471..6722e1e2e 100644 --- a/src/plugins/tools.nfs/nfs_client_impl.cpp +++ b/src/plugins/tools.nfs/nfs_client_impl.cpp @@ -93,6 +93,22 @@ namespace dsn { return; } + // size_list and file_list are parallel arrays in the (remote, untrusted) + // response, but the loop below iterates size_list while indexing file_list. + // A malformed or corrupted response with mismatched lengths would otherwise + // read file_list out of bounds. Validate the lengths match and fail the + // request through the existing nfs_task error channel. + if (resp.size_list.size() != resp.file_list.size()) + { + derror("invalid get file size response: size_list (%d) and file_list (%d) " + "have mismatched lengths", + (int)resp.size_list.size(), + (int)resp.file_list.size()); + ureq->nfs_task->enqueue(ERR_INVALID_DATA, 0); + delete ureq; + return; + } + for (size_t i = 0; i < resp.size_list.size(); i++) // file list { file_context *filec; @@ -234,7 +250,28 @@ namespace dsn { handle_completion(reqc->file_ctx->user_req, err); return; } - + + // response.size and response.file_content are independent fields filled in by the + // (untrusted) remote server. The local write issues + // file::write(response.file_content.data(), response.size, ...), so a response whose + // declared size is negative or larger than the actual file_content buffer would make + // that write read past the end of the buffer -- corrupting the destination file with + // adjacent heap memory (and disclosing it into the replicated data) or crashing. This + // is the client-side counterpart of the size validation on_copy already performs on + // the request. A zero size is legal (empty-file segment) and is handled specially in + // continue_write. On the honest path file_content is the full block buffer and size is + // the valid prefix, so size <= file_content.length() always holds. + if (resp.size < 0 || static_cast(resp.size) > resp.file_content.length()) + { + derror("nfs: invalid copy response for file %s: declared size %d exceeds " + "content buffer length %u", + reqc->file_ctx->file_name.c_str(), + resp.size, + (uint32_t)resp.file_content.length()); + handle_completion(reqc->file_ctx->user_req, ERR_INVALID_DATA); + return; + } + reqc->response = resp; reqc->response.error.end_tracking(); // always ERR_OK reqc->is_ready_for_write = true; @@ -279,82 +316,121 @@ namespace dsn { return; } - // get write - dsn::ref_ptr reqc; + // Loop rather than recurse on per-file failures: a persistent error + // (e.g. a full disk that fails every transfer) would otherwise make + // continue_write() invoke itself once per failed request and could + // grow the call stack without bound. while (true) { + // get write + dsn::ref_ptr reqc; + while (true) { - zauto_lock l(_local_writes_lock); - if (!_local_writes.empty()) { - reqc = _local_writes.front(); - _local_writes.pop(); + zauto_lock l(_local_writes_lock); + if (!_local_writes.empty()) + { + reqc = _local_writes.front(); + _local_writes.pop(); + } + else + { + reqc = nullptr; + break; + } } - else + { - reqc = nullptr; - break; - } + zauto_lock l(reqc->lock); + if (reqc->is_valid) + break; + } } + if (nullptr == reqc) { - zauto_lock l(reqc->lock); - if (reqc->is_valid) - break; + --_concurrent_local_write_count; + return; } - } - if (nullptr == reqc) - { - --_concurrent_local_write_count; - return; - } + // real write + std::string file_path = dsn::utils::filesystem::path_combine(reqc->copy_req.dst_dir, reqc->file_ctx->file_name); + std::string path = dsn::utils::filesystem::remove_file_name(file_path.c_str()); + if (!dsn::utils::filesystem::create_directory(path)) + { + derror("create directory %s failed", path.c_str()); + error_code err = ERR_FILE_OPERATION_FAILED; + handle_completion(reqc->file_ctx->user_req, err); + continue; + } - // real write - std::string file_path = dsn::utils::filesystem::path_combine(reqc->copy_req.dst_dir, reqc->file_ctx->file_name); - std::string path = dsn::utils::filesystem::remove_file_name(file_path.c_str()); - if (!dsn::utils::filesystem::create_directory(path)) - { - dassert(false, "Fail to create directory %s.", path.c_str()); - } + dsn_handle_t hfile = reqc->file_ctx->file.load(); + if (!hfile) + { + zauto_lock l(reqc->file_ctx->user_req->user_req_lock); + hfile = reqc->file_ctx->file.load(); + if (!hfile) + { + hfile = dsn_file_open(file_path.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666); + reqc->file_ctx->file = hfile; + } + } - dsn_handle_t hfile = reqc->file_ctx->file.load(); - if (!hfile) - { - zauto_lock l(reqc->file_ctx->user_req->user_req_lock); - hfile = reqc->file_ctx->file.load(); if (!hfile) { - hfile = dsn_file_open(file_path.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666); - reqc->file_ctx->file = hfile; + derror("file open %s failed", file_path.c_str()); + error_code err = ERR_FILE_OPERATION_FAILED; + handle_completion(reqc->file_ctx->user_req, err); + continue; } - } - if (!hfile) - { - derror("file open %s failed", file_path.c_str()); - error_code err = ERR_FILE_OPERATION_FAILED; - handle_completion(reqc->file_ctx->user_req, err); - --_concurrent_local_write_count; - continue_write(); - return; - } + // An empty source file yields a copy request with response.size == 0. The + // destination file has just been created (or already exists) via the + // O_RDWR | O_CREAT open above, so there is nothing to write: a zero-length + // file::write would complete with ERR_HANDLE_EOF and be reported as a + // failure. Finalize this segment as a success inline -- mirroring the + // success bookkeeping in local_write_callback -- and continue the loop + // (no recursion) instead of issuing the zero-length write. + if (reqc->response.size == 0) + { + reqc->response.file_content = blob(); - { - zauto_lock l(reqc->lock); - auto& reqc_save = *reqc.get(); - reqc_save.local_write_task = file::write( - hfile, - reqc_save.response.file_content.data(), - reqc_save.response.size, - reqc_save.response.offset, - LPC_NFS_WRITE, - this, - [this, reqc_cap = std::move(reqc)] (error_code err, int sz) + bool completed = false; { - local_write_callback(err, sz, std::move(reqc_cap)); + zauto_lock l(reqc->file_ctx->user_req->user_req_lock); + if (++reqc->file_ctx->finished_segments == (int)reqc->file_ctx->copy_requests.size()) + { + if (++reqc->file_ctx->user_req->finished_files == (int)reqc->file_ctx->user_req->file_context_map.size()) + { + completed = true; + } + } + } + + if (completed) + { + handle_completion(reqc->file_ctx->user_req, ERR_OK); } - ); + continue; + } + + { + zauto_lock l(reqc->lock); + auto& reqc_save = *reqc.get(); + reqc_save.local_write_task = file::write( + hfile, + reqc_save.response.file_content.data(), + reqc_save.response.size, + reqc_save.response.offset, + LPC_NFS_WRITE, + this, + [this, reqc_cap = std::move(reqc)] (error_code err, int sz) + { + local_write_callback(err, sz, std::move(reqc_cap)); + } + ); + } + return; } } @@ -443,7 +519,10 @@ namespace dsn { if (f.second->file) { auto err2 = dsn_file_close(f.second->file); - dassert(err2 == ERR_OK, "dsn_file_close failed, err = %s", dsn_error_to_string(err2)); + if (err2 != ERR_OK) + { + dwarn("dsn_file_close failed, err = %s", dsn_error_to_string(err2)); + } f.second->file = nullptr; diff --git a/src/plugins/tools.nfs/nfs_server_impl.cpp b/src/plugins/tools.nfs/nfs_server_impl.cpp index 9aef2a41f..7beb3309b 100644 --- a/src/plugins/tools.nfs/nfs_server_impl.cpp +++ b/src/plugins/tools.nfs/nfs_server_impl.cpp @@ -44,11 +44,14 @@ namespace dsn { //dinfo(">>> on call RPC_COPY end, exec RPC_NFS_COPY"); // request.size is supplied by the (untrusted) client but the read below targets a - // buffer of exactly nfs_copy_block_bytes. Reject out-of-range sizes to avoid a heap - // buffer overflow when reading the file content into the fixed-size block buffer. - if (request.size <= 0 || static_cast(request.size) > _opts.nfs_copy_block_bytes) + // buffer of exactly nfs_copy_block_bytes. Reject negative or oversized lengths to + // avoid a heap buffer overflow when reading the file content into the fixed-size + // block buffer. A zero length is legal and must be preserved: an empty source file + // produces a single zero-byte copy request, which has to succeed so the destination + // empty file still gets created (otherwise one empty file fails the whole transfer). + if (request.size < 0 || static_cast(request.size) > _opts.nfs_copy_block_bytes) { - derror("nfs: invalid copy request size %d (allowed range is (0, %u]) for file %s", + derror("nfs: invalid copy request size %d (allowed range is [0, %u]) for file %s", request.size, _opts.nfs_copy_block_bytes, request.file_name.c_str()); ::dsn::service::copy_response resp; resp.error = ERR_INVALID_PARAMETERS; @@ -56,6 +59,22 @@ namespace dsn { return; } + // An empty source file produces exactly one zero-byte copy request. A + // zero-length file::read completes with ERR_HANDLE_EOF (not ERR_OK), which the + // client treats as a copy failure, so the destination empty file would never + // be created and one empty file would fail the whole transfer. There is no + // content to ship, so reply success directly without opening or reading the + // file; the client creates the empty destination on its own O_CREAT open. + if (request.size == 0) + { + ::dsn::service::copy_response resp; + resp.error = ERR_OK; + resp.offset = request.offset; + resp.size = 0; + reply(resp); + return; + } + std::string file_path = dsn::utils::filesystem::path_combine(request.source_dir, request.file_name); dsn_handle_t hfile; @@ -134,6 +153,21 @@ namespace dsn { } } + // A short read (ERR_OK but fewer bytes than requested) means the source file + // no longer holds the full requested range -- it was truncated, raced with a + // writer, or is corrupt. cp.bb is allocated with make_shared_array, i.e. + // uninitialized heap, and is sent in full with resp.size = cp.size (the + // requested size). Reporting success here would make the client write cp.size + // bytes whose tail [sz, cp.size) was never filled by the read, corrupting the + // destination file and disclosing uninitialized heap memory into the replicated + // data. Fail this copy block through the existing response error channel instead. + if (err == ERR_OK && sz != cp.size) + { + derror("nfs: short read on file %s at offset %" PRId64 ": read %u of %u bytes", + cp.file_path.c_str(), cp.offset, (uint32_t)sz, (uint32_t)cp.size); + err = ERR_FILE_OPERATION_FAILED; + } + ::dsn::service::copy_response resp; resp.error = err; resp.file_content = cp.bb; @@ -173,7 +207,23 @@ namespace dsn { int64_t sz; if (!dsn::utils::filesystem::file_size(fpath, sz)) { - dassert(false, "Fail to get file size of %s.", fpath.c_str()); + derror("get file size of %s failed", fpath.c_str()); + err = ERR_FILE_OPERATION_FAILED; + break; + } + + // fpath is built from the normalized source folder, while the + // strip length below uses the raw (untrusted) request.source_dir. + // A non-normalized source_dir (e.g. redundant or trailing '/') + // from the peer can be longer than the normalized fpath, which + // would make substr() throw std::out_of_range and abort the + // server. Reject such a request instead of crashing. + if (request.source_dir.length() > fpath.length()) + { + derror("invalid source_dir(%s) does not prefix file(%s)", + request.source_dir.c_str(), fpath.c_str()); + err = ERR_INVALID_PARAMETERS; + break; } resp.size_list.push_back((uint64_t)sz); @@ -201,6 +251,18 @@ namespace dsn { // Done uint64_t size = st.st_size; + // path_combine normalizes file_path, which may be shorter than the + // raw (untrusted) request.source_dir; using source_dir.length() as the + // substr position would throw std::out_of_range and abort the server on + // a non-normalized source_dir from the peer. Reject it instead. + if (request.source_dir.length() > file_path.length()) + { + derror("invalid source_dir(%s) does not prefix file(%s)", + request.source_dir.c_str(), file_path.c_str()); + err = ERR_INVALID_PARAMETERS; + break; + } + resp.size_list.push_back(size); resp.file_list.push_back(file_path.substr(request.source_dir.length())); } @@ -222,11 +284,18 @@ namespace dsn { if (fptr->file_access_count == 0 && dsn_now_ms() - fptr->last_access_time > (uint64_t)_opts.file_close_expire_time_ms) { - dinfo("nfs: close file handle %s", it->first.c_str()); + std::string file_name = it->first; + dinfo("nfs: close file handle %s", file_name.c_str()); it = _handles_map.erase(it); ::dsn::error_code err = dsn_file_close(fptr->file_handle); - dassert(err == ERR_OK, "dsn_file_close failed, err = %s", err.to_string()); + if (err != ERR_OK) + { + dwarn("nfs: close file handle %s failed, err = %s", + file_name.c_str(), + err.to_string()); + } + delete fptr; } else diff --git a/src/plugins_ext/rDSN.dist.service b/src/plugins_ext/rDSN.dist.service index 82ec2cd5b..95694f062 160000 --- a/src/plugins_ext/rDSN.dist.service +++ b/src/plugins_ext/rDSN.dist.service @@ -1 +1 @@ -Subproject commit 82ec2cd5b43120837f438edee286371ae0bb94c0 +Subproject commit 95694f062a1ea03aba5368780265ca8ace53806a