forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
6.3 KiB
218 lines
6.3 KiB
/* SPDX-License-Identifier: GPL-2.0 */ |
|
#ifndef _RAID1_H |
|
#define _RAID1_H |
|
|
|
/* |
|
* each barrier unit size is 64MB fow now |
|
* note: it must be larger than RESYNC_DEPTH |
|
*/ |
|
#define BARRIER_UNIT_SECTOR_BITS 17 |
|
#define BARRIER_UNIT_SECTOR_SIZE (1<<17) |
|
/* |
|
* In struct r1conf, the following members are related to I/O barrier |
|
* buckets, |
|
* atomic_t *nr_pending; |
|
* atomic_t *nr_waiting; |
|
* atomic_t *nr_queued; |
|
* atomic_t *barrier; |
|
* Each of them points to array of atomic_t variables, each array is |
|
* designed to have BARRIER_BUCKETS_NR elements and occupy a single |
|
* memory page. The data width of atomic_t variables is 4 bytes, equal |
|
* to 1<<(ilog2(sizeof(atomic_t))), BARRIER_BUCKETS_NR_BITS is defined |
|
* as (PAGE_SHIFT - ilog2(sizeof(int))) to make sure an array of |
|
* atomic_t variables with BARRIER_BUCKETS_NR elements just exactly |
|
* occupies a single memory page. |
|
*/ |
|
#define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t))) |
|
#define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS) |
|
|
|
/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk. |
|
* There are three safe ways to access raid1_info.rdev. |
|
* 1/ when holding mddev->reconfig_mutex |
|
* 2/ when resync/recovery is known to be happening - i.e. in code that is |
|
* called as part of performing resync/recovery. |
|
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer |
|
* and if it is non-NULL, increment rdev->nr_pending before dropping the |
|
* RCU lock. |
|
* When .rdev is set to NULL, the nr_pending count checked again and if it has |
|
* been incremented, the pointer is put back in .rdev. |
|
*/ |
|
|
|
struct raid1_info { |
|
struct md_rdev *rdev; |
|
sector_t head_position; |
|
|
|
/* When choose the best device for a read (read_balance()) |
|
* we try to keep sequential reads one the same device |
|
*/ |
|
sector_t next_seq_sect; |
|
sector_t seq_start; |
|
}; |
|
|
|
/* |
|
* memory pools need a pointer to the mddev, so they can force an unplug |
|
* when memory is tight, and a count of the number of drives that the |
|
* pool was allocated for, so they know how much to allocate and free. |
|
* mddev->raid_disks cannot be used, as it can change while a pool is active |
|
* These two datums are stored in a kmalloced struct. |
|
* The 'raid_disks' here is twice the raid_disks in r1conf. |
|
* This allows space for each 'real' device can have a replacement in the |
|
* second half of the array. |
|
*/ |
|
|
|
struct pool_info { |
|
struct mddev *mddev; |
|
int raid_disks; |
|
}; |
|
|
|
struct r1conf { |
|
struct mddev *mddev; |
|
struct raid1_info *mirrors; /* twice 'raid_disks' to |
|
* allow for replacements. |
|
*/ |
|
int raid_disks; |
|
|
|
spinlock_t device_lock; |
|
|
|
/* list of 'struct r1bio' that need to be processed by raid1d, |
|
* whether to retry a read, writeout a resync or recovery |
|
* block, or anything else. |
|
*/ |
|
struct list_head retry_list; |
|
/* A separate list of r1bio which just need raid_end_bio_io called. |
|
* This mustn't happen for writes which had any errors if the superblock |
|
* needs to be written. |
|
*/ |
|
struct list_head bio_end_io_list; |
|
|
|
/* queue pending writes to be submitted on unplug */ |
|
struct bio_list pending_bio_list; |
|
int pending_count; |
|
|
|
/* for use when syncing mirrors: |
|
* We don't allow both normal IO and resync/recovery IO at |
|
* the same time - resync/recovery can only happen when there |
|
* is no other IO. So when either is active, the other has to wait. |
|
* See more details description in raid1.c near raise_barrier(). |
|
*/ |
|
wait_queue_head_t wait_barrier; |
|
spinlock_t resync_lock; |
|
atomic_t nr_sync_pending; |
|
atomic_t *nr_pending; |
|
atomic_t *nr_waiting; |
|
atomic_t *nr_queued; |
|
atomic_t *barrier; |
|
int array_frozen; |
|
|
|
/* Set to 1 if a full sync is needed, (fresh device added). |
|
* Cleared when a sync completes. |
|
*/ |
|
int fullsync; |
|
|
|
/* When the same as mddev->recovery_disabled we don't allow |
|
* recovery to be attempted as we expect a read error. |
|
*/ |
|
int recovery_disabled; |
|
|
|
/* poolinfo contains information about the content of the |
|
* mempools - it changes when the array grows or shrinks |
|
*/ |
|
struct pool_info *poolinfo; |
|
mempool_t r1bio_pool; |
|
mempool_t r1buf_pool; |
|
|
|
struct bio_set bio_split; |
|
|
|
/* temporary buffer to synchronous IO when attempting to repair |
|
* a read error. |
|
*/ |
|
struct page *tmppage; |
|
|
|
/* When taking over an array from a different personality, we store |
|
* the new thread here until we fully activate the array. |
|
*/ |
|
struct md_thread *thread; |
|
|
|
/* Keep track of cluster resync window to send to other |
|
* nodes. |
|
*/ |
|
sector_t cluster_sync_low; |
|
sector_t cluster_sync_high; |
|
|
|
}; |
|
|
|
/* |
|
* this is our 'private' RAID1 bio. |
|
* |
|
* it contains information about what kind of IO operations were started |
|
* for this RAID1 operation, and about their status: |
|
*/ |
|
|
|
struct r1bio { |
|
atomic_t remaining; /* 'have we finished' count, |
|
* used from IRQ handlers |
|
*/ |
|
atomic_t behind_remaining; /* number of write-behind ios remaining |
|
* in this BehindIO request |
|
*/ |
|
sector_t sector; |
|
int sectors; |
|
unsigned long state; |
|
struct mddev *mddev; |
|
/* |
|
* original bio going to /dev/mdx |
|
*/ |
|
struct bio *master_bio; |
|
/* |
|
* if the IO is in READ direction, then this is where we read |
|
*/ |
|
int read_disk; |
|
|
|
struct list_head retry_list; |
|
|
|
/* |
|
* When R1BIO_BehindIO is set, we store pages for write behind |
|
* in behind_master_bio. |
|
*/ |
|
struct bio *behind_master_bio; |
|
|
|
/* |
|
* if the IO is in WRITE direction, then multiple bios are used. |
|
* We choose the number when they are allocated. |
|
*/ |
|
struct bio *bios[]; |
|
/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ |
|
}; |
|
|
|
/* bits for r1bio.state */ |
|
enum r1bio_state { |
|
R1BIO_Uptodate, |
|
R1BIO_IsSync, |
|
R1BIO_Degraded, |
|
R1BIO_BehindIO, |
|
/* Set ReadError on bios that experience a readerror so that |
|
* raid1d knows what to do with them. |
|
*/ |
|
R1BIO_ReadError, |
|
/* For write-behind requests, we call bi_end_io when |
|
* the last non-write-behind device completes, providing |
|
* any write was successful. Otherwise we call when |
|
* any write-behind write succeeds, otherwise we call |
|
* with failure when last write completes (and all failed). |
|
* Record that bi_end_io was called with this flag... |
|
*/ |
|
R1BIO_Returned, |
|
/* If a write for this request means we can clear some |
|
* known-bad-block records, we set this flag |
|
*/ |
|
R1BIO_MadeGood, |
|
R1BIO_WriteError, |
|
R1BIO_FailFast, |
|
}; |
|
|
|
static inline int sector_to_idx(sector_t sector) |
|
{ |
|
return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS, |
|
BARRIER_BUCKETS_NR_BITS); |
|
} |
|
#endif
|
|
|