Upstream FS driver fixes (#31)

* copy upstream jbd2

* c223fc8664
This commit is contained in:
Willem Melching
2021-08-21 02:14:23 +02:00
parent 7921c03adf
commit 138cd3cfdd
5 changed files with 141 additions and 88 deletions

View File

@@ -207,6 +207,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
struct buffer_head *head;
struct page *page;
int all_mapped = 1;
static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
@@ -234,15 +235,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
* file io on the block device and getblk. It gets dealt with
* elsewhere, don't buffer_error if we had some unmapped buffers
*/
if (all_mapped) {
printk("__find_get_block_slow() failed. "
"block=%llu, b_blocknr=%llu\n",
(unsigned long long)block,
(unsigned long long)bh->b_blocknr);
printk("b_state=0x%08lx, b_size=%zu\n",
bh->b_state, bh->b_size);
printk("device %pg blocksize: %d\n", bdev,
1 << bd_inode->i_blkbits);
ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
if (all_mapped && __ratelimit(&last_warned)) {
printk("__find_get_block_slow() failed. block=%llu, "
"b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
"device %pg blocksize: %d\n",
(unsigned long long)block,
(unsigned long long)bh->b_blocknr,
bh->b_state, bh->b_size, bdev,
1 << bd_inode->i_blkbits);
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
@@ -2788,16 +2789,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
#if 0
/* Not really sure about this - do we need this ? */
if (page->mapping->a_ops->invalidatepage)
page->mapping->a_ops->invalidatepage(page, offset);
#endif
unlock_page(page);
return 0; /* don't care */
}
@@ -2992,12 +2983,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
do_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
@@ -3076,6 +3061,13 @@ void guard_bio_eod(int op, struct bio *bio)
/* Uhhuh. We've got a bio that straddles the device size! */
truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
/*
* The bio contains more than one segment which spans EOD, just return
* and let IO layer turn it into an EIO
*/
if (truncated_bytes > bvec->bv_len)
return;
/* Truncate the bio.. */
bio->bi_iter.bi_size -= truncated_bytes;
bvec->bv_len -= truncated_bytes;
@@ -3231,6 +3223,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
/*
* The bh should be mapped, but it might not be if the
* device was hot-removed. Not much we can do but fail the I/O.
*/
if (!buffer_mapped(bh)) {
unlock_buffer(bh);
return -EIO;
}
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, op_flags, bh);

View File

@@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
"journal space in %s\n", __func__,
journal->j_devname);
WARN_ON(1);
jbd2_journal_abort(journal, 0);
jbd2_journal_abort(journal, -EIO);
}
write_lock(&journal->j_state_lock);
} else {
@@ -254,8 +254,8 @@ restart:
bh = jh2bh(jh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
get_bh(bh);
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
@@ -336,8 +336,8 @@ restart2:
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
get_bh(bh);
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");

View File

@@ -700,9 +700,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
the last tag we set up. */
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
if (descriptor)
jbd2_descriptor_block_csum_set(journal,
descriptor);
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
/*
@@ -720,7 +722,6 @@ start_journal_io:
submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
}
cond_resched();
stats.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next
time round the loop. */
@@ -778,7 +779,7 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
jbd2_journal_abort(journal, err);
}
blk_finish_plug(&plug);
@@ -807,6 +808,7 @@ start_journal_io:
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
jbd2_unfile_log_bh(bh);
stats.run.rs_blocks_logged++;
/*
* The list contains temporary buffer heads created by
@@ -852,6 +854,7 @@ start_journal_io:
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
clear_buffer_jwrite(bh);
jbd2_unfile_log_bh(bh);
stats.run.rs_blocks_logged++;
__brelse(bh); /* One for getblk */
/* AKPM: bforget here */
}
@@ -869,10 +872,11 @@ start_journal_io:
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
jbd2_journal_abort(journal, err);
}
if (cbh)
err = journal_wait_on_commit_record(journal, cbh);
stats.run.rs_blocks_logged++;
if (jbd2_has_feature_async_commit(journal) &&
journal->j_flags & JBD2_BARRIER) {
blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
@@ -965,29 +969,34 @@ restart_loop:
* it. */
/*
* A buffer which has been freed while still being journaled by
* a previous transaction.
*/
if (buffer_freed(bh)) {
* A buffer which has been freed while still being journaled
* by a previous transaction, refile the buffer to BJ_Forget of
* the running transaction. If the just committed transaction
* contains "add to orphan" operation, we can completely
* invalidate the buffer now. We are rather through in that
* since the buffer may be still accessible when blocksize <
* pagesize and it is attached to the last partial page.
*/
if (buffer_freed(bh) && !jh->b_next_transaction) {
struct address_space *mapping;
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
/*
* If the running transaction is the one containing
* "add to orphan" operation (b_next_transaction !=
* NULL), we have to wait for that transaction to
* commit before we can really get rid of the buffer.
* So just clear b_modified to not confuse transaction
* credit accounting and refile the buffer to
* BJ_Forget of the running transaction. If the just
* committed transaction contains "add to orphan"
* operation, we can completely invalidate the buffer
* now. We are rather through in that since the
* buffer may be still accessible when blocksize <
* pagesize and it is attached to the last partial
* page.
* Block device buffers need to stay mapped all the
* time, so it is enough to clear buffer_jbddirty and
* buffer_freed bits. For the file mapping buffers (i.e.
* journalled data) we need to unmap buffer and clear
* more bits. We also need to be careful about the check
* because the data page mapping can get cleared under
* our hands. Note that if mapping == NULL, we don't
* need to make buffer unmapped because the page is
* already detached from the mapping and buffers cannot
* get reused.
*/
jh->b_modified = 0;
if (!jh->b_next_transaction) {
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
mapping = READ_ONCE(bh->b_page->mapping);
if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_req(bh);

View File

@@ -1339,6 +1339,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
journal_superblock_t *sb = journal->j_superblock;
int ret;
/* Buffer got discarded which means block device got invalidated */
if (!buffer_mapped(bh))
return -EIO;
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
@@ -1666,6 +1670,11 @@ int jbd2_journal_load(journal_t *journal)
journal->j_devname);
return -EFSCORRUPTED;
}
/*
* clear JBD2_ABORT flag initialized in journal_init_common
* here to update log tail information with the newest seq.
*/
journal->j_flags &= ~JBD2_ABORT;
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
@@ -1673,7 +1682,6 @@ int jbd2_journal_load(journal_t *journal)
if (journal_reset(journal))
goto recovery_error;
journal->j_flags &= ~JBD2_ABORT;
journal->j_flags |= JBD2_LOADED;
return 0;
@@ -2092,12 +2100,10 @@ static void __journal_abort_soft (journal_t *journal, int errno)
__jbd2_journal_abort_hard(journal);
if (errno) {
jbd2_journal_update_sb_errno(journal);
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_REC_ERR;
write_unlock(&journal->j_state_lock);
}
jbd2_journal_update_sb_errno(journal);
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_REC_ERR;
write_unlock(&journal->j_state_lock);
}
/**
@@ -2139,11 +2145,6 @@ static void __journal_abort_soft (journal_t *journal, int errno)
* failure to disk. ext3_error, for example, now uses this
* functionality.
*
* Errors which originate from within the journaling layer will NOT
* supply an errno; a null errno implies that absolutely no further
* writes are done to the journal (unless there are any already in
* progress).
*
*/
void jbd2_journal_abort(journal_t *journal, int errno)

View File

@@ -1037,8 +1037,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
/* For undo access buffer must have data copied */
if (undo && !jh->b_committed_data)
goto out;
if (jh->b_transaction != handle->h_transaction &&
jh->b_next_transaction != handle->h_transaction)
if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
goto out;
/*
* There are two reasons for the barrier here:
@@ -1211,11 +1211,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
char *committed_data = NULL;
JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh, true))
return 0;
jh = jbd2_journal_add_journal_head(bh);
JBUFFER_TRACE(jh, "entry");
/*
* Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done
@@ -1326,15 +1327,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle))
return -EROFS;
if (!buffer_jbd(bh)) {
ret = -EUCLEAN;
goto out;
}
if (!buffer_jbd(bh))
return -EUCLEAN;
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
@@ -1353,6 +1356,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata) {
jbd_lock_bh_state(bh);
if (jh->b_transaction == transaction &&
jh->b_jlist != BJ_Metadata)
pr_err("JBD2: assertion failure: h_type=%u "
"h_line_no=%u block_no=%llu jlist=%u\n",
handle->h_type, handle->h_line_no,
(unsigned long long) bh->b_blocknr,
jh->b_jlist);
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
jh->b_jlist == BJ_Metadata);
jbd_unlock_bh_state(bh);
@@ -1361,9 +1371,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
journal = transaction->t_journal;
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
jbd_lock_bh_state(bh);
if (jh->b_modified == 0) {
@@ -1372,11 +1379,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* of the transaction. This needs to be done
* once a transaction -bzzz
*/
jh->b_modified = 1;
if (handle->h_buffer_credits <= 0) {
ret = -ENOSPC;
goto out_unlock_bh;
}
jh->b_modified = 1;
handle->h_buffer_credits--;
}
@@ -1561,14 +1568,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
/* ... but we CAN drop it from the new transaction if we
* have also modified it since the original commit. */
/* ... but we CAN drop it from the new transaction through
* marking the buffer as freed and set j_next_transaction to
* the new transaction, so that not only the commit code
* knows it should clear dirty bits when it is done with the
* buffer, but also the buffer can be checkpointed only
* after the new transaction commits. */
if (jh->b_next_transaction) {
J_ASSERT(jh->b_next_transaction == transaction);
set_buffer_freed(bh);
if (!jh->b_next_transaction) {
spin_lock(&journal->j_list_lock);
jh->b_next_transaction = NULL;
jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
} else {
J_ASSERT(jh->b_next_transaction == transaction);
/*
* only drop a reference if this transaction modified
@@ -1882,6 +1896,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
J_ASSERT_JH(jh, jh->b_transaction != NULL);
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
jbd2_journal_put_journal_head(jh);
@@ -1973,6 +1990,7 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
{
struct buffer_head *head;
struct buffer_head *bh;
bool has_write_io_error = false;
int ret = 0;
J_ASSERT(PageLocked(page));
@@ -1997,11 +2015,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
jbd_unlock_bh_state(bh);
if (buffer_jbd(bh))
goto busy;
/*
* If we free a metadata buffer which has been failed to
* write out, the jbd2 checkpoint procedure will not detect
* this failure and may lead to filesystem inconsistency
* after cleanup journal tail.
*/
if (buffer_write_io_error(bh)) {
pr_err("JBD2: Error while async write back metadata bh %llu.",
(unsigned long long)bh->b_blocknr);
has_write_io_error = true;
}
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy:
if (has_write_io_error)
jbd2_journal_abort(journal, -EIO);
return ret;
}
@@ -2199,14 +2232,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
return -EBUSY;
}
/*
* OK, buffer won't be reachable after truncate. We just set
* j_next_transaction to the running transaction (if there is
* one) and mark buffer as freed so that commit code knows it
* should clear dirty bits when it is done with the buffer.
* OK, buffer won't be reachable after truncate. We just clear
* b_modified to not confuse transaction credit accounting, and
* set j_next_transaction to the running transaction (if there
* is one) and mark buffer as freed so that commit code knows
* it should clear dirty bits when it is done with the buffer.
*/
set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh))
jh->b_next_transaction = journal->j_running_transaction;
jh->b_modified = 0;
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -2427,13 +2462,20 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
/*
* b_transaction must be set, otherwise the new b_transaction won't
* be holding jh reference
*/
J_ASSERT_JH(jh, jh->b_transaction != NULL);
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __jbd2_journal_file_buffer() must not
* take a new one.
*/
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
WRITE_ONCE(jh->b_next_transaction, NULL);
if (buffer_freed(bh))
jlist = BJ_Forget;
else if (jh->b_modified)