forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
500 lines
14 KiB
500 lines
14 KiB
/* SPDX-License-Identifier: GPL-2.0-or-later */ |
|
/* |
|
* Copyright (C) International Business Machines Corp., 2000-2004 |
|
* Portions Copyright (C) Christoph Hellwig, 2001-2002 |
|
*/ |
|
#ifndef _H_JFS_LOGMGR |
|
#define _H_JFS_LOGMGR |
|
|
|
#include <linux/uuid.h> |
|
|
|
#include "jfs_filsys.h" |
|
#include "jfs_lock.h" |
|
|
|
/* |
|
* log manager configuration parameters |
|
*/ |
|
|
|
/* log page size */ |
|
#define LOGPSIZE 4096 |
|
#define L2LOGPSIZE 12 |
|
|
|
#define LOGPAGES 16 /* Log pages per mounted file system */ |
|
|
|
/* |
|
* log logical volume |
|
* |
|
* a log is used to make the commit operation on journalled |
|
* files within the same logical volume group atomic. |
|
* a log is implemented with a logical volume. |
|
* there is one log per logical volume group. |
|
* |
|
* block 0 of the log logical volume is not used (ipl etc). |
|
* block 1 contains a log "superblock" and is used by logFormat(), |
|
* lmLogInit(), lmLogShutdown(), and logRedo() to record status |
|
* of the log but is not otherwise used during normal processing. |
|
* blocks 2 - (N-1) are used to contain log records. |
|
* |
|
* when a volume group is varied-on-line, logRedo() must have |
|
* been executed before the file systems (logical volumes) in |
|
* the volume group can be mounted. |
|
*/ |
|
/* |
|
* log superblock (block 1 of logical volume) |
|
*/ |
|
#define LOGSUPER_B 1 |
|
#define LOGSTART_B 2 |
|
|
|
#define LOGMAGIC 0x87654321 |
|
#define LOGVERSION 1 |
|
|
|
#define MAX_ACTIVE 128 /* Max active file systems sharing log */ |
|
|
|
struct logsuper { |
|
__le32 magic; /* 4: log lv identifier */ |
|
__le32 version; /* 4: version number */ |
|
__le32 serial; /* 4: log open/mount counter */ |
|
__le32 size; /* 4: size in number of LOGPSIZE blocks */ |
|
__le32 bsize; /* 4: logical block size in byte */ |
|
__le32 l2bsize; /* 4: log2 of bsize */ |
|
|
|
__le32 flag; /* 4: option */ |
|
__le32 state; /* 4: state - see below */ |
|
|
|
__le32 end; /* 4: addr of last log record set by logredo */ |
|
uuid_t uuid; /* 16: 128-bit journal uuid */ |
|
char label[16]; /* 16: journal label */ |
|
struct { |
|
uuid_t uuid; |
|
} active[MAX_ACTIVE]; /* 2048: active file systems list */ |
|
}; |
|
|
|
/* log flag: commit option (see jfs_filsys.h) */ |
|
|
|
/* log state */ |
|
#define LOGMOUNT 0 /* log mounted by lmLogInit() */ |
|
#define LOGREDONE 1 /* log shutdown by lmLogShutdown(). |
|
* log redo completed by logredo(). |
|
*/ |
|
#define LOGWRAP 2 /* log wrapped */ |
|
#define LOGREADERR 3 /* log read error detected in logredo() */ |
|
|
|
|
|
/* |
|
* log logical page |
|
* |
|
* (this comment should be rewritten !) |
|
* the header and trailer structures (h,t) will normally have |
|
* the same page and eor value. |
|
* An exception to this occurs when a complete page write is not |
|
* accomplished on a power failure. Since the hardware may "split write" |
|
* sectors in the page, any out of order sequence may occur during powerfail |
|
* and needs to be recognized during log replay. The xor value is |
|
* an "exclusive or" of all log words in the page up to eor. This |
|
* 32 bit eor is stored with the top 16 bits in the header and the |
|
* bottom 16 bits in the trailer. logredo can easily recognize pages |
|
* that were not completed by reconstructing this eor and checking |
|
* the log page. |
|
* |
|
* Previous versions of the operating system did not allow split |
|
* writes and detected partially written records in logredo by |
|
* ordering the updates to the header, trailer, and the move of data |
|
* into the logdata area. The order: (1) data is moved (2) header |
|
* is updated (3) trailer is updated. In logredo, when the header |
|
* differed from the trailer, the header and trailer were reconciled |
|
* as follows: if h.page != t.page they were set to the smaller of |
|
* the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) |
|
* h.eor != t.eor they were set to the smaller of their two values. |
|
*/ |
|
struct logpage { |
|
struct { /* header */ |
|
__le32 page; /* 4: log sequence page number */ |
|
__le16 rsrvd; /* 2: */ |
|
__le16 eor; /* 2: end-of-log offset of lasrt record write */ |
|
} h; |
|
|
|
__le32 data[LOGPSIZE / 4 - 4]; /* log record area */ |
|
|
|
struct { /* trailer */ |
|
__le32 page; /* 4: normally the same as h.page */ |
|
__le16 rsrvd; /* 2: */ |
|
__le16 eor; /* 2: normally the same as h.eor */ |
|
} t; |
|
}; |
|
|
|
#define LOGPHDRSIZE 8 /* log page header size */ |
|
#define LOGPTLRSIZE 8 /* log page trailer size */ |
|
|
|
|
|
/* |
|
* log record |
|
* |
|
* (this comment should be rewritten !) |
|
* jfs uses only "after" log records (only a single writer is allowed |
|
* in a page, pages are written to temporary paging space if |
|
* they must be written to disk before commit, and i/o is |
|
* scheduled for modified pages to their home location after |
|
* the log records containing the after values and the commit |
|
* record is written to the log on disk, undo discards the copy |
|
* in main-memory.) |
|
* |
|
* a log record consists of a data area of variable length followed by |
|
* a descriptor of fixed size LOGRDSIZE bytes. |
|
* the data area is rounded up to an integral number of 4-bytes and |
|
* must be no longer than LOGPSIZE. |
|
* the descriptor is of size of multiple of 4-bytes and aligned on a |
|
* 4-byte boundary. |
|
* records are packed one after the other in the data area of log pages. |
|
* (sometimes a DUMMY record is inserted so that at least one record ends |
|
* on every page or the longest record is placed on at most two pages). |
|
* the field eor in page header/trailer points to the byte following |
|
* the last record on a page. |
|
*/ |
|
|
|
/* log record types */ |
|
#define LOG_COMMIT 0x8000 |
|
#define LOG_SYNCPT 0x4000 |
|
#define LOG_MOUNT 0x2000 |
|
#define LOG_REDOPAGE 0x0800 |
|
#define LOG_NOREDOPAGE 0x0080 |
|
#define LOG_NOREDOINOEXT 0x0040 |
|
#define LOG_UPDATEMAP 0x0008 |
|
#define LOG_NOREDOFILE 0x0001 |
|
|
|
/* REDOPAGE/NOREDOPAGE log record data type */ |
|
#define LOG_INODE 0x0001 |
|
#define LOG_XTREE 0x0002 |
|
#define LOG_DTREE 0x0004 |
|
#define LOG_BTROOT 0x0010 |
|
#define LOG_EA 0x0020 |
|
#define LOG_ACL 0x0040 |
|
#define LOG_DATA 0x0080 |
|
#define LOG_NEW 0x0100 |
|
#define LOG_EXTEND 0x0200 |
|
#define LOG_RELOCATE 0x0400 |
|
#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */ |
|
|
|
/* UPDATEMAP log record descriptor type */ |
|
#define LOG_ALLOCXADLIST 0x0080 |
|
#define LOG_ALLOCPXDLIST 0x0040 |
|
#define LOG_ALLOCXAD 0x0020 |
|
#define LOG_ALLOCPXD 0x0010 |
|
#define LOG_FREEXADLIST 0x0008 |
|
#define LOG_FREEPXDLIST 0x0004 |
|
#define LOG_FREEXAD 0x0002 |
|
#define LOG_FREEPXD 0x0001 |
|
|
|
|
|
struct lrd { |
|
/* |
|
* type independent area |
|
*/ |
|
__le32 logtid; /* 4: log transaction identifier */ |
|
__le32 backchain; /* 4: ptr to prev record of same transaction */ |
|
__le16 type; /* 2: record type */ |
|
__le16 length; /* 2: length of data in record (in byte) */ |
|
__le32 aggregate; /* 4: file system lv/aggregate */ |
|
/* (16) */ |
|
|
|
/* |
|
* type dependent area (20) |
|
*/ |
|
union { |
|
|
|
/* |
|
* COMMIT: commit |
|
* |
|
* transaction commit: no type-dependent information; |
|
*/ |
|
|
|
/* |
|
* REDOPAGE: after-image |
|
* |
|
* apply after-image; |
|
* |
|
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
|
*/ |
|
struct { |
|
__le32 fileset; /* 4: fileset number */ |
|
__le32 inode; /* 4: inode number */ |
|
__le16 type; /* 2: REDOPAGE record type */ |
|
__le16 l2linesize; /* 2: log2 of line size */ |
|
pxd_t pxd; /* 8: on-disk page pxd */ |
|
} redopage; /* (20) */ |
|
|
|
/* |
|
* NOREDOPAGE: the page is freed |
|
* |
|
* do not apply after-image records which precede this record |
|
* in the log with the same page block number to this page. |
|
* |
|
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
|
*/ |
|
struct { |
|
__le32 fileset; /* 4: fileset number */ |
|
__le32 inode; /* 4: inode number */ |
|
__le16 type; /* 2: NOREDOPAGE record type */ |
|
__le16 rsrvd; /* 2: reserved */ |
|
pxd_t pxd; /* 8: on-disk page pxd */ |
|
} noredopage; /* (20) */ |
|
|
|
/* |
|
* UPDATEMAP: update block allocation map |
|
* |
|
* either in-line PXD, |
|
* or out-of-line XADLIST; |
|
* |
|
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format; |
|
*/ |
|
struct { |
|
__le32 fileset; /* 4: fileset number */ |
|
__le32 inode; /* 4: inode number */ |
|
__le16 type; /* 2: UPDATEMAP record type */ |
|
__le16 nxd; /* 2: number of extents */ |
|
pxd_t pxd; /* 8: pxd */ |
|
} updatemap; /* (20) */ |
|
|
|
/* |
|
* NOREDOINOEXT: the inode extent is freed |
|
* |
|
* do not apply after-image records which precede this |
|
* record in the log with the any of the 4 page block |
|
* numbers in this inode extent. |
|
* |
|
* NOTE: The fileset and pxd fields MUST remain in |
|
* the same fields in the REDOPAGE record format. |
|
* |
|
*/ |
|
struct { |
|
__le32 fileset; /* 4: fileset number */ |
|
__le32 iagnum; /* 4: IAG number */ |
|
__le32 inoext_idx; /* 4: inode extent index */ |
|
pxd_t pxd; /* 8: on-disk page pxd */ |
|
} noredoinoext; /* (20) */ |
|
|
|
/* |
|
* SYNCPT: log sync point |
|
* |
|
* replay log up to syncpt address specified; |
|
*/ |
|
struct { |
|
__le32 sync; /* 4: syncpt address (0 = here) */ |
|
} syncpt; |
|
|
|
/* |
|
* MOUNT: file system mount |
|
* |
|
* file system mount: no type-dependent information; |
|
*/ |
|
|
|
/* |
|
* ? FREEXTENT: free specified extent(s) |
|
* |
|
* free specified extent(s) from block allocation map |
|
* N.B.: nextents should be length of data/sizeof(xad_t) |
|
*/ |
|
struct { |
|
__le32 type; /* 4: FREEXTENT record type */ |
|
__le32 nextent; /* 4: number of extents */ |
|
|
|
/* data: PXD or XAD list */ |
|
} freextent; |
|
|
|
/* |
|
* ? NOREDOFILE: this file is freed |
|
* |
|
* do not apply records which precede this record in the log |
|
* with the same inode number. |
|
* |
|
* NOREDOFILE must be the first to be written at commit |
|
* (last to be read in logredo()) - it prevents |
|
* replay of preceding updates of all preceding generations |
|
* of the inumber esp. the on-disk inode itself. |
|
*/ |
|
struct { |
|
__le32 fileset; /* 4: fileset number */ |
|
__le32 inode; /* 4: inode number */ |
|
} noredofile; |
|
|
|
/* |
|
* ? NEWPAGE: |
|
* |
|
* metadata type dependent |
|
*/ |
|
struct { |
|
__le32 fileset; /* 4: fileset number */ |
|
__le32 inode; /* 4: inode number */ |
|
__le32 type; /* 4: NEWPAGE record type */ |
|
pxd_t pxd; /* 8: on-disk page pxd */ |
|
} newpage; |
|
|
|
/* |
|
* ? DUMMY: filler |
|
* |
|
* no type-dependent information |
|
*/ |
|
} log; |
|
}; /* (36) */ |
|
|
|
#define LOGRDSIZE (sizeof(struct lrd)) |
|
|
|
/* |
|
* line vector descriptor |
|
*/ |
|
struct lvd { |
|
__le16 offset; |
|
__le16 length; |
|
}; |
|
|
|
|
|
/* |
|
* log logical volume |
|
*/ |
|
struct jfs_log { |
|
|
|
struct list_head sb_list;/* This is used to sync metadata |
|
* before writing syncpt. |
|
*/ |
|
struct list_head journal_list; /* Global list */ |
|
struct block_device *bdev; /* 4: log lv pointer */ |
|
int serial; /* 4: log mount serial number */ |
|
|
|
s64 base; /* @8: log extent address (inline log ) */ |
|
int size; /* 4: log size in log page (in page) */ |
|
int l2bsize; /* 4: log2 of bsize */ |
|
|
|
unsigned long flag; /* 4: flag */ |
|
|
|
struct lbuf *lbuf_free; /* 4: free lbufs */ |
|
wait_queue_head_t free_wait; /* 4: */ |
|
|
|
/* log write */ |
|
int logtid; /* 4: log tid */ |
|
int page; /* 4: page number of eol page */ |
|
int eor; /* 4: eor of last record in eol page */ |
|
struct lbuf *bp; /* 4: current log page buffer */ |
|
|
|
struct mutex loglock; /* 4: log write serialization lock */ |
|
|
|
/* syncpt */ |
|
int nextsync; /* 4: bytes to write before next syncpt */ |
|
int active; /* 4: */ |
|
wait_queue_head_t syncwait; /* 4: */ |
|
|
|
/* commit */ |
|
uint cflag; /* 4: */ |
|
struct list_head cqueue; /* FIFO commit queue */ |
|
struct tblock *flush_tblk; /* tblk we're waiting on for flush */ |
|
int gcrtc; /* 4: GC_READY transaction count */ |
|
struct tblock *gclrt; /* 4: latest GC_READY transaction */ |
|
spinlock_t gclock; /* 4: group commit lock */ |
|
int logsize; /* 4: log data area size in byte */ |
|
int lsn; /* 4: end-of-log */ |
|
int clsn; /* 4: clsn */ |
|
int syncpt; /* 4: addr of last syncpt record */ |
|
int sync; /* 4: addr from last logsync() */ |
|
struct list_head synclist; /* 8: logsynclist anchor */ |
|
spinlock_t synclock; /* 4: synclist lock */ |
|
struct lbuf *wqueue; /* 4: log pageout queue */ |
|
int count; /* 4: count */ |
|
uuid_t uuid; /* 16: 128-bit uuid of log device */ |
|
|
|
int no_integrity; /* 3: flag to disable journaling to disk */ |
|
}; |
|
|
|
/* |
|
* Log flag |
|
*/ |
|
#define log_INLINELOG 1 |
|
#define log_SYNCBARRIER 2 |
|
#define log_QUIESCE 3 |
|
#define log_FLUSH 4 |
|
|
|
/* |
|
* group commit flag |
|
*/ |
|
/* jfs_log */ |
|
#define logGC_PAGEOUT 0x00000001 |
|
|
|
/* tblock/lbuf */ |
|
#define tblkGC_QUEUE 0x0001 |
|
#define tblkGC_READY 0x0002 |
|
#define tblkGC_COMMIT 0x0004 |
|
#define tblkGC_COMMITTED 0x0008 |
|
#define tblkGC_EOP 0x0010 |
|
#define tblkGC_FREE 0x0020 |
|
#define tblkGC_LEADER 0x0040 |
|
#define tblkGC_ERROR 0x0080 |
|
#define tblkGC_LAZY 0x0100 // D230860 |
|
#define tblkGC_UNLOCKED 0x0200 // D230860 |
|
|
|
/* |
|
* log cache buffer header |
|
*/ |
|
struct lbuf { |
|
struct jfs_log *l_log; /* 4: log associated with buffer */ |
|
|
|
/* |
|
* data buffer base area |
|
*/ |
|
uint l_flag; /* 4: pageout control flags */ |
|
|
|
struct lbuf *l_wqnext; /* 4: write queue link */ |
|
struct lbuf *l_freelist; /* 4: freelistlink */ |
|
|
|
int l_pn; /* 4: log page number */ |
|
int l_eor; /* 4: log record eor */ |
|
int l_ceor; /* 4: committed log record eor */ |
|
|
|
s64 l_blkno; /* 8: log page block number */ |
|
caddr_t l_ldata; /* 4: data page */ |
|
struct page *l_page; /* The page itself */ |
|
uint l_offset; /* Offset of l_ldata within the page */ |
|
|
|
wait_queue_head_t l_ioevent; /* 4: i/o done event */ |
|
}; |
|
|
|
/* Reuse l_freelist for redrive list */ |
|
#define l_redrive_next l_freelist |
|
|
|
/* |
|
* logsynclist block |
|
* |
|
* common logsyncblk prefix for jbuf_t and tblock |
|
*/ |
|
struct logsyncblk { |
|
u16 xflag; /* flags */ |
|
u16 flag; /* only meaninful in tblock */ |
|
lid_t lid; /* lock id */ |
|
s32 lsn; /* log sequence number */ |
|
struct list_head synclist; /* log sync list link */ |
|
}; |
|
|
|
/* |
|
* logsynclist serialization (per log) |
|
*/ |
|
|
|
#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) |
|
#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags) |
|
#define LOGSYNC_UNLOCK(log, flags) \ |
|
spin_unlock_irqrestore(&(log)->synclock, flags) |
|
|
|
/* compute the difference in bytes of lsn from sync point */ |
|
#define logdiff(diff, lsn, log)\ |
|
{\ |
|
diff = (lsn) - (log)->syncpt;\ |
|
if (diff < 0)\ |
|
diff += (log)->logsize;\ |
|
} |
|
|
|
extern int lmLogOpen(struct super_block *sb); |
|
extern int lmLogClose(struct super_block *sb); |
|
extern int lmLogShutdown(struct jfs_log * log); |
|
extern int lmLogInit(struct jfs_log * log); |
|
extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); |
|
extern int lmGroupCommit(struct jfs_log *, struct tblock *); |
|
extern int jfsIOWait(void *); |
|
extern void jfs_flush_journal(struct jfs_log * log, int wait); |
|
extern void jfs_syncpt(struct jfs_log *log, int hard_sync); |
|
|
|
#endif /* _H_JFS_LOGMGR */
|
|
|