Issue
When allocating a page for an inode, if a crash occurs the alter pages are not persisted and the inode's checksum is updated, a segment fault will occur since alter_log_entry computes the alter log address via the address to the later page.
Reason
Allocating a new page for a newly created file has such processes:
- allocate a page for the main log, update inode (Line 1216)
- allocate a page for the alternative log, update inode (Line 1221)
- set the alter page address for both main and alter pages, flush them but no fence. (Line 1226)
- update the inode checksum (Line 1232)
- fence after updating the inode checksum (Within the function at Line 1232)
|
if (curr_p == 0) { |
|
ret = nova_initialize_inode_log(sb, pi, sih, MAIN_LOG); |
|
if (ret) |
|
return 0; |
|
|
|
if (metadata_csum) { |
|
ret = nova_initialize_inode_log(sb, pi, sih, ALTER_LOG); |
|
if (ret) |
|
return 0; |
|
|
|
nova_memunlock_inode(sb, pi, &irq_flags); |
|
nova_update_alter_pages(sb, pi, sih->log_head, |
|
sih->alter_log_head); |
|
nova_memlock_inode(sb, pi, &irq_flags); |
|
} |
|
|
|
nova_memunlock_inode(sb, pi, &irq_flags); |
|
nova_update_inode_checksum(pi); |
|
nova_memlock_inode(sb, pi, &irq_flags); |
|
|
|
return sih->log_head; |
Since there are no fences before updating the checksum, Step 3 could be issued after Step 4. If a crash occurs, after recovery, this issue will cause segment fault. In th nova_verify_entry_csum function, NOVA reads the alter log by the alter page address (Line 328), leading to the bug (Line 335).
|
entry_off = nova_get_addr_off(sbi, entry); |
|
alter_off = alter_log_entry(sb, entry_off); |
|
if (alter_off == 0) { |
|
nova_err(sb, "%s: log page tail error detected\n", __func__); |
|
goto fail; |
|
} |
|
|
|
alter = (void *) nova_get_block(sb, alter_off); |
|
ret = nova_get_entry_copy(sb, alter, &alter_csum, &alter_size, |
|
alter_copy); |
Reproduce
if (curr_p == 0) {
ret = nova_initialize_inode_log(sb, pi, sih, MAIN_LOG);
if (ret)
return 0;
if (metadata_csum) {
ret = nova_initialize_inode_log(sb, pi, sih, ALTER_LOG);
if (ret)
return 0;
nova_memunlock_inode(sb, pi, &irq_flags);
if (pi->nova_ino == 33) {
// simulate the out-of-order execution.
nova_dbg("-->> %s: skip set alter page. ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
} else {
nova_update_alter_pages(sb, pi, sih->log_head,
sih->alter_log_head);
}
nova_memlock_inode(sb, pi, &irq_flags);
}
// info where we are
nova_dbg("-->> %s: ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
nova_memunlock_inode(sb, pi, &irq_flags);
nova_update_inode_checksum(pi);
if (pi->nova_ino == 33) {
// simulate a crash after updating checksum
nova_dbg("-->> %s: bug on: ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
BUG();
}
nova_memlock_inode(sb, pi, &irq_flags);
return sih->log_head;
}
The command to reproduce the bug
# zero out the PM device
dd if=/dev/zero of=/dev/pmem0 bs=1048576 count=100
# make the modified code
make
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o init,data_cow,dbgmask=255 /dev/pmem0 /mnt/pmem0
# do not use touch to create a file, since touch will call two functions, one for creating, another for updating attributes (this will allocate pages and append a log entry)
> /mnt/pmem0/foo
# execute any command to trigger the page allocation
dd if=/dev/zero of=/mnt/pmem0/foo bs=8192 count=1 oflag=direct
# The inserted bug will be triggered here, cat the image for recovery
cat /dev/pmem0 > image
#### reboot the system
# revert the source code modification and then make
make
# restore the image
cat image > /dev/pmem0
# recover it
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o data_cow,dbgmask=255 /dev/pmem0 /mnt/pmem0
# stat can show the file is good
# execute command to trigger the `nova_verify_entry_csum` function
dd if=/dev/zero of=/mnt/pmem0/foo bs=8192 count=1 oflag=direc
# bug occurs, syslog will show the FS stoped at `nova_get_entry_copy`
Fix
Both flushing and fencing when setting the alter address of log pages.
Issue
When allocating a page for an inode, if a crash occurs the alter pages are not persisted and the inode's checksum is updated, a segment fault will occur since
alter_log_entrycomputes the alter log address via the address to the later page.Reason
Allocating a new page for a newly created file has such processes:
linux-nova/fs/nova/log.c
Lines 1215 to 1235 in 976a4d1
Since there are no fences before updating the checksum, Step 3 could be issued after Step 4. If a crash occurs, after recovery, this issue will cause segment fault. In th
nova_verify_entry_csumfunction, NOVA reads the alter log by the alter page address (Line 328), leading to the bug (Line 335).linux-nova/fs/nova/checksum.c
Lines 327 to 336 in 976a4d1
Reproduce
The command to reproduce the bug
Fix
Both flushing and fencing when setting the alter address of log pages.