xfs_log.c 105 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
/*
2
3
 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
 * All Rights Reserved.
Linus Torvalds's avatar
Linus Torvalds committed
4
 *
5
6
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
Linus Torvalds's avatar
Linus Torvalds committed
7
8
 * published by the Free Software Foundation.
 *
9
10
11
12
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
Linus Torvalds's avatar
Linus Torvalds committed
13
 *
14
15
16
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
Linus Torvalds's avatar
Linus Torvalds committed
17
18
 */
#include "xfs.h"
19
#include "xfs_fs.h"
Linus Torvalds's avatar
Linus Torvalds committed
20
#include "xfs_types.h"
21
#include "xfs_bit.h"
Linus Torvalds's avatar
Linus Torvalds committed
22
#include "xfs_log.h"
23
#include "xfs_inum.h"
Linus Torvalds's avatar
Linus Torvalds committed
24
#include "xfs_trans.h"
25
26
27
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
Linus Torvalds's avatar
Linus Torvalds committed
28
29
30
31
32
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_buf_item.h"
33
#include "xfs_bmap_btree.h"
Linus Torvalds's avatar
Linus Torvalds committed
34
#include "xfs_alloc_btree.h"
35
#include "xfs_ialloc_btree.h"
Linus Torvalds's avatar
Linus Torvalds committed
36
37
#include "xfs_log_recover.h"
#include "xfs_trans_priv.h"
38
39
40
41
42
#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_rw.h"
Linus Torvalds's avatar
Linus Torvalds committed
43

44
kmem_zone_t	*xfs_log_ticket_zone;
Linus Torvalds's avatar
Linus Torvalds committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

#define xlog_write_adv_cnt(ptr, len, off, bytes) \
	{ (ptr) += (bytes); \
	  (len) -= (bytes); \
	  (off) += (bytes);}

/* Local miscellaneous function prototypes */
STATIC int	 xlog_bdstrat_cb(struct xfs_buf *);
STATIC int	 xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
				    xlog_in_core_t **, xfs_lsn_t *);
STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,
				xfs_buftarg_t	*log_target,
				xfs_daddr_t	blk_offset,
				int		num_bblks);
STATIC int	 xlog_space_left(xlog_t *log, int cycle, int bytes);
STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
61
STATIC void	 xlog_dealloc_log(xlog_t *log);
Linus Torvalds's avatar
Linus Torvalds committed
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
STATIC int	 xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
			    int nentries, xfs_log_ticket_t tic,
			    xfs_lsn_t *start_lsn,
			    xlog_in_core_t **commit_iclog,
			    uint flags);

/* local state machine functions */
STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog);
STATIC int  xlog_state_get_iclog_space(xlog_t		*log,
				       int		len,
				       xlog_in_core_t	**iclog,
				       xlog_ticket_t	*ticket,
				       int		*continued_write,
				       int		*logoffsetp);
STATIC int  xlog_state_release_iclog(xlog_t		*log,
				     xlog_in_core_t	*iclog);
STATIC void xlog_state_switch_iclogs(xlog_t		*log,
				     xlog_in_core_t *iclog,
				     int		eventual_size);
82
83
84
85
86
STATIC int  xlog_state_sync(xlog_t			*log,
			    xfs_lsn_t 			lsn,
			    uint			flags,
			    int				*log_flushed);
STATIC int  xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
Linus Torvalds's avatar
Linus Torvalds committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog);

/* local functions to manipulate grant head */
STATIC int  xlog_grant_log_space(xlog_t		*log,
				 xlog_ticket_t	*xtic);
STATIC void xlog_grant_push_ail(xfs_mount_t	*mp,
				int		need_bytes);
STATIC void xlog_regrant_reserve_log_space(xlog_t	 *log,
					   xlog_ticket_t *ticket);
STATIC int xlog_regrant_write_log_space(xlog_t		*log,
					 xlog_ticket_t  *ticket);
STATIC void xlog_ungrant_log_space(xlog_t	 *log,
				   xlog_ticket_t *ticket);


/* local ticket functions */
STATIC xlog_ticket_t	*xlog_ticket_get(xlog_t *log,
					 int	unit_bytes,
					 int	count,
					 char	clientid,
					 uint	flags);
STATIC void		xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket);

110
#if defined(DEBUG)
Linus Torvalds's avatar
Linus Torvalds committed
111
112
113
114
115
116
117
118
119
120
121
122
123
STATIC void	xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
STATIC void	xlog_verify_grant_head(xlog_t *log, int equals);
STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
				  int count, boolean_t syncing);
STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
				     xfs_lsn_t tail_lsn);
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_head(a,b)
#define xlog_verify_iclog(a,b,c,d)
#define xlog_verify_tail_lsn(a,b,c)
#endif

124
STATIC int	xlog_iclogs_empty(xlog_t *log);
Linus Torvalds's avatar
Linus Torvalds committed
125
126

#if defined(XFS_LOG_TRACE)
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

#define XLOG_TRACE_LOGGRANT_SIZE	2048
#define XLOG_TRACE_ICLOG_SIZE		256

void
xlog_trace_loggrant_alloc(xlog_t *log)
{
	log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
}

void
xlog_trace_loggrant_dealloc(xlog_t *log)
{
	ktrace_free(log->l_grant_trace);
}

Linus Torvalds's avatar
Linus Torvalds committed
143
144
145
void
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
{
146
147
148
149
	unsigned long cnts;

	/* ticket counts are 1 byte each */
	cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
Linus Torvalds's avatar
Linus Torvalds committed
150
151
152
153
154
155
156
157
158
159
160
161
162
163

	ktrace_enter(log->l_grant_trace,
		     (void *)tic,
		     (void *)log->l_reserve_headq,
		     (void *)log->l_write_headq,
		     (void *)((unsigned long)log->l_grant_reserve_cycle),
		     (void *)((unsigned long)log->l_grant_reserve_bytes),
		     (void *)((unsigned long)log->l_grant_write_cycle),
		     (void *)((unsigned long)log->l_grant_write_bytes),
		     (void *)((unsigned long)log->l_curr_cycle),
		     (void *)((unsigned long)log->l_curr_block),
		     (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
		     (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
		     (void *)string,
164
165
166
167
		     (void *)((unsigned long)tic->t_trans_type),
		     (void *)cnts,
		     (void *)((unsigned long)tic->t_curr_res),
		     (void *)((unsigned long)tic->t_unit_res));
Linus Torvalds's avatar
Linus Torvalds committed
168
169
}

170
171
172
173
174
175
176
177
178
179
180
181
void
xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
{
	iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
}

void
xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
{
	ktrace_free(iclog->ic_trace);
}

Linus Torvalds's avatar
Linus Torvalds committed
182
183
184
185
186
void
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
{
	ktrace_enter(iclog->ic_trace,
		     (void *)((unsigned long)state),
187
188
189
190
191
		     (void *)((unsigned long)current_pid()),
		     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
		     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
		     (void *)NULL, (void *)NULL, (void *)NULL, (void *)NULL,
		     (void *)NULL, (void *)NULL);
Linus Torvalds's avatar
Linus Torvalds committed
192
193
}
#else
194
195
196

#define	xlog_trace_loggrant_alloc(log)
#define	xlog_trace_loggrant_dealloc(log)
Linus Torvalds's avatar
Linus Torvalds committed
197
#define	xlog_trace_loggrant(log,tic,string)
198
199
200

#define	xlog_trace_iclog_alloc(iclog)
#define	xlog_trace_iclog_dealloc(iclog)
Linus Torvalds's avatar
Linus Torvalds committed
201
#define	xlog_trace_iclog(iclog,state)
202

Linus Torvalds's avatar
Linus Torvalds committed
203
204
#endif /* XFS_LOG_TRACE */

205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256

static void
xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
{
	if (*qp) {
		tic->t_next	    = (*qp);
		tic->t_prev	    = (*qp)->t_prev;
		(*qp)->t_prev->t_next = tic;
		(*qp)->t_prev	    = tic;
	} else {
		tic->t_prev = tic->t_next = tic;
		*qp = tic;
	}

	tic->t_flags |= XLOG_TIC_IN_Q;
}

static void
xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
{
	if (tic == tic->t_next) {
		*qp = NULL;
	} else {
		*qp = tic->t_next;
		tic->t_next->t_prev = tic->t_prev;
		tic->t_prev->t_next = tic->t_next;
	}

	tic->t_next = tic->t_prev = NULL;
	tic->t_flags &= ~XLOG_TIC_IN_Q;
}

static void
xlog_grant_sub_space(struct log *log, int bytes)
{
	log->l_grant_write_bytes -= bytes;
	if (log->l_grant_write_bytes < 0) {
		log->l_grant_write_bytes += log->l_logsize;
		log->l_grant_write_cycle--;
	}

	log->l_grant_reserve_bytes -= bytes;
	if ((log)->l_grant_reserve_bytes < 0) {
		log->l_grant_reserve_bytes += log->l_logsize;
		log->l_grant_reserve_cycle--;
	}

}

static void
xlog_grant_add_space_write(struct log *log, int bytes)
{
257
258
259
260
	int tmp = log->l_logsize - log->l_grant_write_bytes;
	if (tmp > bytes)
		log->l_grant_write_bytes += bytes;
	else {
261
		log->l_grant_write_cycle++;
262
		log->l_grant_write_bytes = bytes - tmp;
263
264
265
266
267
268
	}
}

static void
xlog_grant_add_space_reserve(struct log *log, int bytes)
{
269
270
271
272
	int tmp = log->l_logsize - log->l_grant_reserve_bytes;
	if (tmp > bytes)
		log->l_grant_reserve_bytes += bytes;
	else {
273
		log->l_grant_reserve_cycle++;
274
		log->l_grant_reserve_bytes = bytes - tmp;
275
276
277
278
279
280
281
282
283
284
	}
}

static inline void
xlog_grant_add_space(struct log *log, int bytes)
{
	xlog_grant_add_space_write(log, bytes);
	xlog_grant_add_space_reserve(log, bytes);
}

285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
static void
xlog_tic_reset_res(xlog_ticket_t *tic)
{
	tic->t_res_num = 0;
	tic->t_res_arr_sum = 0;
	tic->t_res_num_ophdrs = 0;
}

static void
xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
{
	if (tic->t_res_num == XLOG_TIC_LEN_MAX) {
		/* add to overflow and start again */
		tic->t_res_o_flow += tic->t_res_arr_sum;
		tic->t_res_num = 0;
		tic->t_res_arr_sum = 0;
	}

	tic->t_res_arr[tic->t_res_num].r_len = len;
	tic->t_res_arr[tic->t_res_num].r_type = type;
	tic->t_res_arr_sum += len;
	tic->t_res_num++;
}
308

Linus Torvalds's avatar
Linus Torvalds committed
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
/*
 * NOTES:
 *
 *	1. currblock field gets updated at startup and after in-core logs
 *		marked as with WANT_SYNC.
 */

/*
 * This routine is called when a user of a log manager ticket is done with
 * the reservation.  If the ticket was ever used, then a commit record for
 * the associated transaction is written out as a log operation header with
 * no data.  The flag XLOG_TIC_INITED is set when the first write occurs with
 * a given ticket.  If the ticket was one with a permanent reservation, then
 * a few operations are done differently.  Permanent reservation tickets by
 * default don't release the reservation.  They just commit the current
 * transaction with the belief that the reservation is still needed.  A flag
 * must be passed in before permanent reservations are actually released.
 * When these type of tickets are not released, they need to be set into
 * the inited state again.  By doing this, a start record will be written
 * out when the next write occurs.
 */
xfs_lsn_t
xfs_log_done(xfs_mount_t	*mp,
	     xfs_log_ticket_t	xtic,
	     void		**iclog,
	     uint		flags)
{
	xlog_t		*log    = mp->m_log;
	xlog_ticket_t	*ticket = (xfs_log_ticket_t) xtic;
	xfs_lsn_t	lsn	= 0;

	if (XLOG_FORCED_SHUTDOWN(log) ||
	    /*
	     * If nothing was ever written, don't write out commit record.
	     * If we get an error, just continue and give back the log ticket.
	     */
	    (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
	     (xlog_commit_record(mp, ticket,
				 (xlog_in_core_t **)iclog, &lsn)))) {
		lsn = (xfs_lsn_t) -1;
		if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
			flags |= XFS_LOG_REL_PERM_RESERV;
		}
	}


	if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
	    (flags & XFS_LOG_REL_PERM_RESERV)) {
		/*
358
		 * Release ticket if not permanent reservation or a specific
Linus Torvalds's avatar
Linus Torvalds committed
359
360
		 * request has been made to release a permanent reservation.
		 */
361
		xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
Linus Torvalds's avatar
Linus Torvalds committed
362
		xlog_ungrant_log_space(log, ticket);
363
		xlog_ticket_put(log, ticket);
Linus Torvalds's avatar
Linus Torvalds committed
364
	} else {
365
		xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
Linus Torvalds's avatar
Linus Torvalds committed
366
		xlog_regrant_reserve_log_space(log, ticket);
367
368
369
370
		/* If this ticket was a permanent reservation and we aren't
		 * trying to release it, reset the inited flags; so next time
		 * we write, a start record will be written out.
		 */
Linus Torvalds's avatar
Linus Torvalds committed
371
		ticket->t_flags |= XLOG_TIC_INITED;
372
	}
Linus Torvalds's avatar
Linus Torvalds committed
373
374
375
376
377
378
379
380
381
382
383
384

	return lsn;
}	/* xfs_log_done */


/*
 * Force the in-core log to disk.  If flags == XFS_LOG_SYNC,
 *	the force is done synchronously.
 *
 * Asynchronous forces are implemented by setting the WANT_SYNC
 * bit in the appropriate in-core log and then returning.
 *
385
386
 * Synchronous forces are implemented with a signal variable. All callers
 * to force a given lsn to disk will wait on a the sv attached to the
Linus Torvalds's avatar
Linus Torvalds committed
387
388
 * specific in-core log.  When given in-core log finally completes its
 * write to disk, that thread will wake up all threads waiting on the
389
 * sv.
Linus Torvalds's avatar
Linus Torvalds committed
390
391
 */
int
392
393
394
395
396
_xfs_log_force(
	xfs_mount_t	*mp,
	xfs_lsn_t	lsn,
	uint		flags,
	int		*log_flushed)
Linus Torvalds's avatar
Linus Torvalds committed
397
{
398
399
400
401
402
	xlog_t		*log = mp->m_log;
	int		dummy;

	if (!log_flushed)
		log_flushed = &dummy;
Linus Torvalds's avatar
Linus Torvalds committed
403
404
405
406
407

	ASSERT(flags & XFS_LOG_FORCE);

	XFS_STATS_INC(xs_log_force);

408
409
410
411
412
413
	if (log->l_flags & XLOG_IO_ERROR)
		return XFS_ERROR(EIO);
	if (lsn == 0)
		return xlog_state_sync_all(log, flags, log_flushed);
	else
		return xlog_state_sync(log, lsn, flags, log_flushed);
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
}	/* _xfs_log_force */

/*
 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
 * about errors or whether the log was flushed or not. This is the normal
 * interface to use when trying to unpin items or move the log forward.
 */
void
xfs_log_force(
	xfs_mount_t	*mp,
	xfs_lsn_t	lsn,
	uint		flags)
{
	int	error;
	error = _xfs_log_force(mp, lsn, flags, NULL);
	if (error) {
		xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
			"error %d returned.", error);
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
435
436
437
438
439
440
441
442
443
444
445
446
447

/*
 * Attaches a new iclog I/O completion callback routine during
 * transaction commit.  If the log is in error state, a non-zero
 * return code is handed back and the caller is responsible for
 * executing the callback at an appropriate time.
 */
int
xfs_log_notify(xfs_mount_t	  *mp,		/* mount of partition */
	       void		  *iclog_hndl,	/* iclog to hang callback off */
	       xfs_log_callback_t *cb)
{
	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;
Eric Sandeen's avatar
Eric Sandeen committed
448
	int	abortflg;
Linus Torvalds's avatar
Linus Torvalds committed
449

450
	spin_lock(&iclog->ic_callback_lock);
Linus Torvalds's avatar
Linus Torvalds committed
451
452
453
454
455
456
457
458
	abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
	if (!abortflg) {
		ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
			      (iclog->ic_state == XLOG_STATE_WANT_SYNC));
		cb->cb_next = NULL;
		*(iclog->ic_callback_tail) = cb;
		iclog->ic_callback_tail = &(cb->cb_next);
	}
459
	spin_unlock(&iclog->ic_callback_lock);
Linus Torvalds's avatar
Linus Torvalds committed
460
461
462
463
464
465
466
467
468
469
470
	return abortflg;
}	/* xfs_log_notify */

int
xfs_log_release_iclog(xfs_mount_t *mp,
		      void	  *iclog_hndl)
{
	xlog_t *log = mp->m_log;
	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;

	if (xlog_state_release_iclog(log, iclog)) {
471
		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
472
		return EIO;
Linus Torvalds's avatar
Linus Torvalds committed
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
	}

	return 0;
}

/*
 *  1. Reserve an amount of on-disk log space and return a ticket corresponding
 *	to the reservation.
 *  2. Potentially, push buffers at tail of log to disk.
 *
 * Each reservation is going to reserve extra space for a log record header.
 * When writes happen to the on-disk log, we don't subtract the length of the
 * log record header from any reservation.  By wasting space in each
 * reservation, we prevent over allocation problems.
 */
int
xfs_log_reserve(xfs_mount_t	 *mp,
		int		 unit_bytes,
		int		 cnt,
		xfs_log_ticket_t *ticket,
		__uint8_t	 client,
494
495
		uint		 flags,
		uint		 t_type)
Linus Torvalds's avatar
Linus Torvalds committed
496
497
498
{
	xlog_t		*log = mp->m_log;
	xlog_ticket_t	*internal_ticket;
499
	int		retval = 0;
Linus Torvalds's avatar
Linus Torvalds committed
500
501
502
503
504
505
506
507
508
509
510
511

	ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
	ASSERT((flags & XFS_LOG_NOSLEEP) == 0);

	if (XLOG_FORCED_SHUTDOWN(log))
		return XFS_ERROR(EIO);

	XFS_STATS_INC(xs_try_logspace);

	if (*ticket != NULL) {
		ASSERT(flags & XFS_LOG_PERM_RESERV);
		internal_ticket = (xlog_ticket_t *)*ticket;
512
		xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
Linus Torvalds's avatar
Linus Torvalds committed
513
514
515
516
517
518
		xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
		retval = xlog_regrant_write_log_space(log, internal_ticket);
	} else {
		/* may sleep if need to allocate more tickets */
		internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
						  client, flags);
519
520
		if (!internal_ticket)
			return XFS_ERROR(ENOMEM);
521
		internal_ticket->t_trans_type = t_type;
Linus Torvalds's avatar
Linus Torvalds committed
522
		*ticket = internal_ticket;
523
524
525
526
		xlog_trace_loggrant(log, internal_ticket, 
			(internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
			"xfs_log_reserve: create new ticket (permanent trans)" :
			"xfs_log_reserve: create new ticket");
Linus Torvalds's avatar
Linus Torvalds committed
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
		xlog_grant_push_ail(mp,
				    (internal_ticket->t_unit_res *
				     internal_ticket->t_cnt));
		retval = xlog_grant_log_space(log, internal_ticket);
	}

	return retval;
}	/* xfs_log_reserve */


/*
 * Mount a log filesystem
 *
 * mp		- ubiquitous xfs mount point structure
 * log_target	- buftarg of on-disk log device
 * blk_offset	- Start block # where block size is 512 bytes (BBSIZE)
 * num_bblocks	- Number of BBSIZE blocks in on-disk log
 *
 * Return error or zero.
 */
int
548
549
550
551
552
xfs_log_mount(
	xfs_mount_t	*mp,
	xfs_buftarg_t	*log_target,
	xfs_daddr_t	blk_offset,
	int		num_bblks)
Linus Torvalds's avatar
Linus Torvalds committed
553
{
554
555
	int		error;

Linus Torvalds's avatar
Linus Torvalds committed
556
557
558
559
560
561
	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
		cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname);
	else {
		cmn_err(CE_NOTE,
			"!Mounting filesystem \"%s\" in no-recovery mode.  Filesystem will be inconsistent.",
			mp->m_fsname);
562
		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
Linus Torvalds's avatar
Linus Torvalds committed
563
564
565
566
	}

	mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);

567
568
569
570
571
572
573
574
575
576
	/*
	 * Initialize the AIL now we have a log.
	 */
	spin_lock_init(&mp->m_ail_lock);
	error = xfs_trans_ail_init(mp);
	if (error) {
		cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
		goto error;
	}

Linus Torvalds's avatar
Linus Torvalds committed
577
578
579
580
581
	/*
	 * skip log recovery on a norecovery mount.  pretend it all
	 * just worked.
	 */
	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
582
		int	readonly = (mp->m_flags & XFS_MOUNT_RDONLY);
Linus Torvalds's avatar
Linus Torvalds committed
583
584

		if (readonly)
585
			mp->m_flags &= ~XFS_MOUNT_RDONLY;
Linus Torvalds's avatar
Linus Torvalds committed
586

587
		error = xlog_recover(mp->m_log);
Linus Torvalds's avatar
Linus Torvalds committed
588
589

		if (readonly)
590
			mp->m_flags |= XFS_MOUNT_RDONLY;
Linus Torvalds's avatar
Linus Torvalds committed
591
592
		if (error) {
			cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
593
			goto error;
Linus Torvalds's avatar
Linus Torvalds committed
594
595
596
597
598
599
600
601
		}
	}

	/* Normal transactions can now occur */
	mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;

	/* End mounting message in xfs_log_mount_finish */
	return 0;
602
603
604
error:
	xfs_log_unmount_dealloc(mp);
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
605
606
607
608
609
610
611
612
613
614
615
}	/* xfs_log_mount */

/*
 * Finish the recovery of the file system.  This is separate from
 * the xfs_log_mount() call, because it depends on the code in
 * xfs_mountfs() to read in the root and real-time bitmap inodes
 * between calling xfs_log_mount() and here.
 *
 * mp		- ubiquitous xfs mount point structure
 */
int
Christoph Hellwig's avatar
Christoph Hellwig committed
616
xfs_log_mount_finish(xfs_mount_t *mp)
Linus Torvalds's avatar
Linus Torvalds committed
617
618
619
620
{
	int	error;

	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
Christoph Hellwig's avatar
Christoph Hellwig committed
621
		error = xlog_recover_finish(mp->m_log);
Linus Torvalds's avatar
Linus Torvalds committed
622
623
	else {
		error = 0;
624
		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
Linus Torvalds's avatar
Linus Torvalds committed
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
	}

	return error;
}

/*
 * Unmount processing for the log.
 */
int
xfs_log_unmount(xfs_mount_t *mp)
{
	int		error;

	error = xfs_log_unmount_write(mp);
	xfs_log_unmount_dealloc(mp);
640
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
}

/*
 * Final log writes as part of unmount.
 *
 * Mark the filesystem clean as unmount happens.  Note that during relocation
 * this routine needs to be executed as part of source-bag while the
 * deallocation must not be done until source-end.
 */

/*
 * Unmount record used to have a string "Unmount filesystem--" in the
 * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
 * We just write the magic number now since that particular field isn't
 * currently architecture converted and "nUmount" is a bit foo.
 * As far as I know, there weren't any dependencies on the old behaviour.
 */

int
xfs_log_unmount_write(xfs_mount_t *mp)
{
	xlog_t		 *log = mp->m_log;
	xlog_in_core_t	 *iclog;
#ifdef DEBUG
	xlog_in_core_t	 *first_iclog;
#endif
	xfs_log_iovec_t  reg[1];
	xfs_log_ticket_t tic = NULL;
	xfs_lsn_t	 lsn;
	int		 error;

	/* the data section must be 32 bit size aligned */
	struct {
	    __uint16_t magic;
	    __uint16_t pad1;
	    __uint32_t pad2; /* may as well make it 64 bits */
	} magic = { XLOG_UNMOUNT_TYPE, 0, 0 };

	/*
	 * Don't write out unmount record on read-only mounts.
	 * Or, if we are doing a forced umount (typically because of IO errors).
	 */
683
	if (mp->m_flags & XFS_MOUNT_RDONLY)
Linus Torvalds's avatar
Linus Torvalds committed
684
685
		return 0;

686
687
	error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL);
	ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
Linus Torvalds's avatar
Linus Torvalds committed
688
689
690
691
692
693
694
695
696
697
698
699
700
701

#ifdef DEBUG
	first_iclog = iclog = log->l_iclog;
	do {
		if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
			ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
			ASSERT(iclog->ic_offset == 0);
		}
		iclog = iclog->ic_next;
	} while (iclog != first_iclog);
#endif
	if (! (XLOG_FORCED_SHUTDOWN(log))) {
		reg[0].i_addr = (void*)&magic;
		reg[0].i_len  = sizeof(magic);
702
		XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
Linus Torvalds's avatar
Linus Torvalds committed
703

704
705
		error = xfs_log_reserve(mp, 600, 1, &tic,
					XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
Linus Torvalds's avatar
Linus Torvalds committed
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
		if (!error) {
			/* remove inited flag */
			((xlog_ticket_t *)tic)->t_flags = 0;
			error = xlog_write(mp, reg, 1, tic, &lsn,
					   NULL, XLOG_UNMOUNT_TRANS);
			/*
			 * At this point, we're umounting anyway,
			 * so there's no point in transitioning log state
			 * to IOERROR. Just continue...
			 */
		}

		if (error) {
			xfs_fs_cmn_err(CE_ALERT, mp,
				"xfs_log_unmount: unmount record failed");
		}


Eric Sandeen's avatar
Eric Sandeen committed
724
		spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
725
		iclog = log->l_iclog;
726
		atomic_inc(&iclog->ic_refcnt);
Eric Sandeen's avatar
Eric Sandeen committed
727
		spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
728
		xlog_state_want_sync(log, iclog);
729
		error = xlog_state_release_iclog(log, iclog);
Linus Torvalds's avatar
Linus Torvalds committed
730

Eric Sandeen's avatar
Eric Sandeen committed
731
		spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
732
733
734
		if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
		      iclog->ic_state == XLOG_STATE_DIRTY)) {
			if (!XLOG_FORCED_SHUTDOWN(log)) {
735
				sv_wait(&iclog->ic_force_wait, PMEM,
Linus Torvalds's avatar
Linus Torvalds committed
736
737
					&log->l_icloglock, s);
			} else {
Eric Sandeen's avatar
Eric Sandeen committed
738
				spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
739
740
			}
		} else {
Eric Sandeen's avatar
Eric Sandeen committed
741
			spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
742
		}
743
744
745
		if (tic) {
			xlog_trace_loggrant(log, tic, "unmount rec");
			xlog_ungrant_log_space(log, tic);
746
			xlog_ticket_put(log, tic);
747
		}
Linus Torvalds's avatar
Linus Torvalds committed
748
749
750
751
752
753
754
	} else {
		/*
		 * We're already in forced_shutdown mode, couldn't
		 * even attempt to write out the unmount transaction.
		 *
		 * Go through the motions of sync'ing and releasing
		 * the iclog, even though no I/O will actually happen,
755
		 * we need to wait for other log I/Os that may already
Linus Torvalds's avatar
Linus Torvalds committed
756
757
758
759
760
761
		 * be in progress.  Do this as a separate section of
		 * code so we'll know if we ever get stuck here that
		 * we're in this odd situation of trying to unmount
		 * a file system that went into forced_shutdown as
		 * the result of an unmount..
		 */
Eric Sandeen's avatar
Eric Sandeen committed
762
		spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
763
		iclog = log->l_iclog;
764
		atomic_inc(&iclog->ic_refcnt);
Eric Sandeen's avatar
Eric Sandeen committed
765
		spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
766
767

		xlog_state_want_sync(log, iclog);
768
		error =  xlog_state_release_iclog(log, iclog);
Linus Torvalds's avatar
Linus Torvalds committed
769

Eric Sandeen's avatar
Eric Sandeen committed
770
		spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
771
772
773
774
775

		if ( ! (   iclog->ic_state == XLOG_STATE_ACTIVE
			|| iclog->ic_state == XLOG_STATE_DIRTY
			|| iclog->ic_state == XLOG_STATE_IOERROR) ) {

776
				sv_wait(&iclog->ic_force_wait, PMEM,
Linus Torvalds's avatar
Linus Torvalds committed
777
778
					&log->l_icloglock, s);
		} else {
Eric Sandeen's avatar
Eric Sandeen committed
779
			spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
780
781
782
		}
	}

783
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
784
785
786
787
}	/* xfs_log_unmount_write */

/*
 * Deallocate log structures for unmount/relocation.
788
789
790
 *
 * We need to stop the aild from running before we destroy
 * and deallocate the log as the aild references the log.
Linus Torvalds's avatar
Linus Torvalds committed
791
792
793
794
 */
void
xfs_log_unmount_dealloc(xfs_mount_t *mp)
{
795
	xfs_trans_ail_destroy(mp);
796
	xlog_dealloc_log(mp->m_log);
Linus Torvalds's avatar
Linus Torvalds committed
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
}

/*
 * Write region vectors to log.  The write happens using the space reservation
 * of the ticket (tic).  It is not a requirement that all writes for a given
 * transaction occur with one call to xfs_log_write().
 */
int
xfs_log_write(xfs_mount_t *	mp,
	      xfs_log_iovec_t	reg[],
	      int		nentries,
	      xfs_log_ticket_t	tic,
	      xfs_lsn_t		*start_lsn)
{
	int	error;
	xlog_t *log = mp->m_log;

	if (XLOG_FORCED_SHUTDOWN(log))
		return XFS_ERROR(EIO);

	if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
818
		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
Linus Torvalds's avatar
Linus Torvalds committed
819
	}
820
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
}	/* xfs_log_write */


void
xfs_log_move_tail(xfs_mount_t	*mp,
		  xfs_lsn_t	tail_lsn)
{
	xlog_ticket_t	*tic;
	xlog_t		*log = mp->m_log;
	int		need_bytes, free_bytes, cycle, bytes;

	if (XLOG_FORCED_SHUTDOWN(log))
		return;

	if (tail_lsn == 0) {
		/* needed since sync_lsn is 64 bits */
Eric Sandeen's avatar
Eric Sandeen committed
837
		spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
838
		tail_lsn = log->l_last_sync_lsn;
Eric Sandeen's avatar
Eric Sandeen committed
839
		spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
840
841
	}

Eric Sandeen's avatar
Eric Sandeen committed
842
	spin_lock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865

	/* Also an invalid lsn.  1 implies that we aren't passing in a valid
	 * tail_lsn.
	 */
	if (tail_lsn != 1) {
		log->l_tail_lsn = tail_lsn;
	}

	if ((tic = log->l_write_headq)) {
#ifdef DEBUG
		if (log->l_flags & XLOG_ACTIVE_RECOVERY)
			panic("Recovery problem");
#endif
		cycle = log->l_grant_write_cycle;
		bytes = log->l_grant_write_bytes;
		free_bytes = xlog_space_left(log, cycle, bytes);
		do {
			ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);

			if (free_bytes < tic->t_unit_res && tail_lsn != 1)
				break;
			tail_lsn = 0;
			free_bytes -= tic->t_unit_res;
866
			sv_signal(&tic->t_wait);
Linus Torvalds's avatar
Linus Torvalds committed
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
			tic = tic->t_next;
		} while (tic != log->l_write_headq);
	}
	if ((tic = log->l_reserve_headq)) {
#ifdef DEBUG
		if (log->l_flags & XLOG_ACTIVE_RECOVERY)
			panic("Recovery problem");
#endif
		cycle = log->l_grant_reserve_cycle;
		bytes = log->l_grant_reserve_bytes;
		free_bytes = xlog_space_left(log, cycle, bytes);
		do {
			if (tic->t_flags & XLOG_TIC_PERM_RESERV)
				need_bytes = tic->t_unit_res*tic->t_cnt;
			else
				need_bytes = tic->t_unit_res;
			if (free_bytes < need_bytes && tail_lsn != 1)
				break;
			tail_lsn = 0;
			free_bytes -= need_bytes;
887
			sv_signal(&tic->t_wait);
Linus Torvalds's avatar
Linus Torvalds committed
888
889
890
			tic = tic->t_next;
		} while (tic != log->l_reserve_headq);
	}
Eric Sandeen's avatar
Eric Sandeen committed
891
	spin_unlock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
892
893
894
895
896
897
898
899
900
901
902
903
904
905
}	/* xfs_log_move_tail */

/*
 * Determine if we have a transaction that has gone to disk
 * that needs to be covered. Log activity needs to be idle (no AIL and
 * nothing in the iclogs). And, we need to be in the right state indicating
 * something has gone out.
 */
int
xfs_log_need_covered(xfs_mount_t *mp)
{
	int		needed = 0, gen;
	xlog_t		*log = mp->m_log;

David Chinner's avatar
David Chinner committed
906
	if (!xfs_fs_writable(mp))
Linus Torvalds's avatar
Linus Torvalds committed
907
908
		return 0;

Eric Sandeen's avatar
Eric Sandeen committed
909
	spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
910
911
912
913
914
915
916
917
918
919
920
921
	if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
		(log->l_covered_state == XLOG_STATE_COVER_NEED2))
			&& !xfs_trans_first_ail(mp, &gen)
			&& xlog_iclogs_empty(log)) {
		if (log->l_covered_state == XLOG_STATE_COVER_NEED)
			log->l_covered_state = XLOG_STATE_COVER_DONE;
		else {
			ASSERT(log->l_covered_state == XLOG_STATE_COVER_NEED2);
			log->l_covered_state = XLOG_STATE_COVER_DONE2;
		}
		needed = 1;
	}
Eric Sandeen's avatar
Eric Sandeen committed
922
	spin_unlock(&log->l_icloglock);
923
	return needed;
Linus Torvalds's avatar
Linus Torvalds committed
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
}

/******************************************************************************
 *
 *	local routines
 *
 ******************************************************************************
 */

/* xfs_trans_tail_ail returns 0 when there is nothing in the list.
 * The log manager must keep track of the last LR which was committed
 * to disk.  The lsn of this LR will become the new tail_lsn whenever
 * xfs_trans_tail_ail returns 0.  If we don't do this, we run into
 * the situation where stuff could be written into the log but nothing
 * was ever in the AIL when asked.  Eventually, we panic since the
 * tail hits the head.
 *
 * We may be holding the log iclog lock upon entering this routine.
 */
xfs_lsn_t
xlog_assign_tail_lsn(xfs_mount_t *mp)
{
	xfs_lsn_t tail_lsn;
	xlog_t	  *log = mp->m_log;

	tail_lsn = xfs_trans_tail_ail(mp);
Eric Sandeen's avatar
Eric Sandeen committed
950
	spin_lock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
951
952
953
954
955
	if (tail_lsn != 0) {
		log->l_tail_lsn = tail_lsn;
	} else {
		tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
	}
Eric Sandeen's avatar
Eric Sandeen committed
956
	spin_unlock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975

	return tail_lsn;
}	/* xlog_assign_tail_lsn */


/*
 * Return the space in the log between the tail and the head.  The head
 * is passed in the cycle/bytes formal parms.  In the special case where
 * the reserve head has wrapped passed the tail, this calculation is no
 * longer valid.  In this case, just return 0 which means there is no space
 * in the log.  This works for all places where this function is called
 * with the reserve head.  Of course, if the write head were to ever
 * wrap the tail, we should blow up.  Rather than catch this case here,
 * we depend on other ASSERTions in other parts of the code.   XXXmiken
 *
 * This code also handles the case where the reservation head is behind
 * the tail.  The details of this case are described below, but the end
 * result is that we return the size of the log as the amount of space left.
 */
David Chinner's avatar
David Chinner committed
976
STATIC int
Linus Torvalds's avatar
Linus Torvalds committed
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
xlog_space_left(xlog_t *log, int cycle, int bytes)
{
	int free_bytes;
	int tail_bytes;
	int tail_cycle;

	tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn));
	tail_cycle = CYCLE_LSN(log->l_tail_lsn);
	if ((tail_cycle == cycle) && (bytes >= tail_bytes)) {
		free_bytes = log->l_logsize - (bytes - tail_bytes);
	} else if ((tail_cycle + 1) < cycle) {
		return 0;
	} else if (tail_cycle < cycle) {
		ASSERT(tail_cycle == (cycle - 1));
		free_bytes = tail_bytes - bytes;
	} else {
		/*
		 * The reservation head is behind the tail.
		 * In this case we just want to return the size of the
		 * log as the amount of space left.
		 */
		xfs_fs_cmn_err(CE_ALERT, log->l_mp,
			"xlog_space_left: head behind tail\n"
			"  tail_cycle = %d, tail_bytes = %d\n"
			"  GH   cycle = %d, GH   bytes = %d",
			tail_cycle, tail_bytes, cycle, bytes);
		ASSERT(0);
		free_bytes = log->l_logsize;
	}
	return free_bytes;
}	/* xlog_space_left */


/*
 * Log function which is called when an io completes.
 *
 * The log manager needs its own routine, in order to control what
 * happens with the buffer after the write completes.
 */
void
xlog_iodone(xfs_buf_t *bp)
{
	xlog_in_core_t	*iclog;
	xlog_t		*l;
	int		aborted;

	iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
	ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
	aborted = 0;

	/*
	 * Some versions of cpp barf on the recursive definition of
	 * ic_log -> hic_fields.ic_log and expand ic_log twice when
	 * it is passed through two macros.  Workaround broken cpp.
	 */
	l = iclog->ic_log;

1035
	/*
1036
1037
	 * If the _XFS_BARRIER_FAILED flag was set by a lower
	 * layer, it means the underlying device no longer supports
1038
1039
	 * barrier I/O. Warn loudly and turn off barriers.
	 */
1040
1041
	if (bp->b_flags & _XFS_BARRIER_FAILED) {
		bp->b_flags &= ~_XFS_BARRIER_FAILED;
1042
1043
1044
1045
1046
1047
1048
		l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
		xfs_fs_cmn_err(CE_WARN, l->l_mp,
				"xlog_iodone: Barriers are no longer supported"
				" by device. Disabling barriers\n");
		xfs_buftrace("XLOG_IODONE BARRIERS OFF", bp);
	}

Linus Torvalds's avatar
Linus Torvalds committed
1049
1050
1051
1052
1053
1054
1055
	/*
	 * Race to shutdown the filesystem if we see an error.
	 */
	if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
			XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
		xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
		XFS_BUF_STALE(bp);
1056
		xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
Linus Torvalds's avatar
Linus Torvalds committed
1057
1058
1059
1060
1061
1062
1063
1064
1065
		/*
		 * This flag will be propagated to the trans-committed
		 * callback routines to let them know that the log-commit
		 * didn't succeed.
		 */
		aborted = XFS_LI_ABORTED;
	} else if (iclog->ic_state & XLOG_STATE_IOERROR) {
		aborted = XFS_LI_ABORTED;
	}
1066
1067
1068

	/* log I/O is always issued ASYNC */
	ASSERT(XFS_BUF_ISASYNC(bp));
Linus Torvalds's avatar
Linus Torvalds committed
1069
	xlog_state_done_syncing(iclog, aborted);
1070
1071
1072
1073
1074
1075
	/*
	 * do not reference the buffer (bp) here as we could race
	 * with it being freed after writing the unmount record to the
	 * log.
	 */

Linus Torvalds's avatar
Linus Torvalds committed
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
}	/* xlog_iodone */

/*
 * The bdstrat callback function for log bufs. This gives us a central
 * place to trap bufs in case we get hit by a log I/O error and need to
 * shutdown. Actually, in practice, even when we didn't get a log error,
 * we transition the iclogs to IOERROR state *after* flushing all existing
 * iclogs to disk. This is because we don't want anymore new transactions to be
 * started or completed afterwards.
 */
STATIC int
xlog_bdstrat_cb(struct xfs_buf *bp)
{
	xlog_in_core_t *iclog;

	iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);

	if ((iclog->ic_state & XLOG_STATE_IOERROR) == 0) {
	  /* note for irix bstrat will need  struct bdevsw passed
	   * Fix the following macro if the code ever is merged
	   */
	    XFS_bdstrat(bp);
		return 0;
	}

	xfs_buftrace("XLOG__BDSTRAT IOERROR", bp);
	XFS_BUF_ERROR(bp, EIO);
	XFS_BUF_STALE(bp);
	xfs_biodone(bp);
1105
	return XFS_ERROR(EIO);
Linus Torvalds's avatar
Linus Torvalds committed
1106
1107
1108
1109
1110
1111
1112


}

/*
 * Return size of each in-core log record buffer.
 *
1113
 * All machines get 8 x 32KB buffers by default, unless tuned otherwise.
Linus Torvalds's avatar
Linus Torvalds committed
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
 *
 * If the filesystem blocksize is too large, we may need to choose a
 * larger size since the directory code currently logs entire blocks.
 */

STATIC void
xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
			   xlog_t	*log)
{
	int size;
	int xhdrs;

1126
1127
1128
	if (mp->m_logbufs <= 0)
		log->l_iclog_bufs = XLOG_MAX_ICLOGS;
	else
1129
		log->l_iclog_bufs = mp->m_logbufs;
Linus Torvalds's avatar
Linus Torvalds committed
1130
1131
1132
1133

	/*
	 * Buffer size passed in from mount system call.
	 */
1134
	if (mp->m_logbsize > 0) {
Linus Torvalds's avatar
Linus Torvalds committed
1135
1136
1137
1138
1139
1140
1141
		size = log->l_iclog_size = mp->m_logbsize;
		log->l_iclog_size_log = 0;
		while (size != 1) {
			log->l_iclog_size_log++;
			size >>= 1;
		}

1142
		if (xfs_sb_version_haslogv2(&mp->m_sb)) {
Linus Torvalds's avatar
Linus Torvalds committed
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
			/* # headers = size / 32K
			 * one header holds cycles from 32K of data
			 */

			xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
			if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
				xhdrs++;
			log->l_iclog_hsize = xhdrs << BBSHIFT;
			log->l_iclog_heads = xhdrs;
		} else {
			ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
			log->l_iclog_hsize = BBSIZE;
			log->l_iclog_heads = 1;
		}
1157
		goto done;
Linus Torvalds's avatar
Linus Torvalds committed
1158
1159
	}

1160
1161
1162
	/* All machines use 32KB buffers by default. */
	log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
	log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
Linus Torvalds's avatar
Linus Torvalds committed
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174

	/* the default log size is 16k or 32k which is one header sector */
	log->l_iclog_hsize = BBSIZE;
	log->l_iclog_heads = 1;

	/*
	 * For 16KB, we use 3 32KB buffers.  For 32KB block sizes, we use
	 * 4 32KB buffers.  For 64KB block sizes, we use 8 32KB buffers.
	 */
	if (mp->m_sb.sb_blocksize >= 16*1024) {
		log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
		log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
1175
		if (mp->m_logbufs <= 0) {
Linus Torvalds's avatar
Linus Torvalds committed
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
			switch (mp->m_sb.sb_blocksize) {
			    case 16*1024:			/* 16 KB */
				log->l_iclog_bufs = 3;
				break;
			    case 32*1024:			/* 32 KB */
				log->l_iclog_bufs = 4;
				break;
			    case 64*1024:			/* 64 KB */
				log->l_iclog_bufs = 8;
				break;
			    default:
				xlog_panic("XFS: Invalid blocksize");
				break;
			}
		}
	}
1192
1193
1194
1195
1196
1197

done:	/* are we being asked to make the sizes selected above visible? */
	if (mp->m_logbufs == 0)
		mp->m_logbufs = log->l_iclog_bufs;
	if (mp->m_logbsize == 0)
		mp->m_logbsize = log->l_iclog_size;
Linus Torvalds's avatar
Linus Torvalds committed
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
}	/* xlog_get_iclog_buffer_size */


/*
 * This routine initializes some of the log structure for a given mount point.
 * Its primary purpose is to fill in enough, so recovery can occur.  However,
 * some other stuff may be filled in too.
 */
STATIC xlog_t *
xlog_alloc_log(xfs_mount_t	*mp,
	       xfs_buftarg_t	*log_target,
	       xfs_daddr_t	blk_offset,
	       int		num_bblks)
{
	xlog_t			*log;
	xlog_rec_header_t	*head;
	xlog_in_core_t		**iclogp;
	xlog_in_core_t		*iclog, *prev_iclog=NULL;
	xfs_buf_t		*bp;
	int			i;
	int			iclogsize;

	log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP);

	log->l_mp	   = mp;
	log->l_targ	   = log_target;
	log->l_logsize     = BBTOB(num_bblks);
	log->l_logBBstart  = blk_offset;
	log->l_logBBsize   = num_bblks;
	log->l_covered_state = XLOG_STATE_COVER_IDLE;
	log->l_flags	   |= XLOG_ACTIVE_RECOVERY;

	log->l_prev_block  = -1;
1231
	log->l_tail_lsn	   = xlog_assign_lsn(1, 0);
Linus Torvalds's avatar
Linus Torvalds committed
1232
1233
1234
1235
1236
1237
	/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
	log->l_last_sync_lsn = log->l_tail_lsn;
	log->l_curr_cycle  = 1;	    /* 0 is bad since this is initial value */
	log->l_grant_reserve_cycle = 1;
	log->l_grant_write_cycle = 1;

1238
	if (xfs_sb_version_hassector(&mp->m_sb)) {
Linus Torvalds's avatar
Linus Torvalds committed
1239
1240
1241
1242
1243
		log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
		ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
		/* for larger sector sizes, must have v2 or external log */
		ASSERT(log->l_sectbb_log == 0 ||
			log->l_logBBstart == 0 ||
1244
			xfs_sb_version_haslogv2(&mp->m_sb));
Linus Torvalds's avatar
Linus Torvalds committed
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
		ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
	}
	log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;

	xlog_get_iclog_buffer_size(mp, log);

	bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
	XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
	XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
	ASSERT(XFS_BUF_ISBUSY(bp));
	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
	log->l_xbuf = bp;

Eric Sandeen's avatar
Eric Sandeen committed
1259
1260
	spin_lock_init(&log->l_icloglock);
	spin_lock_init(&log->l_grant_lock);
1261
	sv_init(&log->l_flush_wait, 0, "flush_wait");
Linus Torvalds's avatar
Linus Torvalds committed
1262

1263
	xlog_trace_loggrant_alloc(log);
Linus Torvalds's avatar
Linus Torvalds committed
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
	/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
	ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);

	iclogp = &log->l_iclog;
	/*
	 * The amount of memory to allocate for the iclog structure is
	 * rather funky due to the way the structure is defined.  It is
	 * done this way so that we can use different sizes for machines
	 * with different amounts of memory.  See the definition of
	 * xlog_in_core_t in xfs_log_priv.h for details.
	 */
	iclogsize = log->l_iclog_size;
	ASSERT(log->l_iclog_size >= 4096);
	for (i=0; i < log->l_iclog_bufs; i++) {
		*iclogp = (xlog_in_core_t *)
			  kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP);
		iclog = *iclogp;
		iclog->ic_prev = prev_iclog;
		prev_iclog = iclog;
1283
1284
1285
1286
1287
1288
1289
1290
1291

		bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
		if (!XFS_BUF_CPSEMA(bp))
			ASSERT(0);
		XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
		XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
		iclog->ic_bp = bp;
		iclog->hic_data = bp->b_addr;
1292
#ifdef DEBUG
Linus Torvalds's avatar
Linus Torvalds committed
1293
		log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1294
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1295
1296
		head = &iclog->ic_header;
		memset(head, 0, sizeof(xlog_rec_header_t));
1297
1298
		head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
		head->h_version = cpu_to_be32(
1299
			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
1300
		head->h_size = cpu_to_be32(log->l_iclog_size);
Linus Torvalds's avatar
Linus Torvalds committed
1301
		/* new fields */
1302
		head->h_fmt = cpu_to_be32(XLOG_FMT);
Linus Torvalds's avatar
Linus Torvalds committed
1303
1304
1305
1306
1307
		memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));

		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
		iclog->ic_state = XLOG_STATE_ACTIVE;
		iclog->ic_log = log;
1308
1309
		atomic_set(&iclog->ic_refcnt, 0);
		spin_lock_init(&iclog->ic_callback_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1310
1311
1312
1313
1314
		iclog->ic_callback_tail = &(iclog->ic_callback);
		iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;

		ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
		ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
1315
1316
		sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
		sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
Linus Torvalds's avatar
Linus Torvalds committed
1317

1318
1319
		xlog_trace_iclog_alloc(iclog);

Linus Torvalds's avatar
Linus Torvalds committed
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
		iclogp = &iclog->ic_next;
	}
	*iclogp = log->l_iclog;			/* complete ring */
	log->l_iclog->ic_prev = prev_iclog;	/* re-write 1st prev ptr */

	return log;
}	/* xlog_alloc_log */


/*
 * Write out the commit record of a transaction associated with the given
 * ticket.  Return the lsn of the commit record.
 */
STATIC int
xlog_commit_record(xfs_mount_t  *mp,
		   xlog_ticket_t *ticket,
		   xlog_in_core_t **iclog,
		   xfs_lsn_t	*commitlsnp)
{
	int		error;
	xfs_log_iovec_t	reg[1];

	reg[0].i_addr = NULL;
	reg[0].i_len = 0;
1344
	XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT);
Linus Torvalds's avatar
Linus Torvalds committed
1345
1346
1347
1348

	ASSERT_ALWAYS(iclog);
	if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
			       iclog, XLOG_COMMIT_TRANS))) {
1349
		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
Linus Torvalds's avatar
Linus Torvalds committed
1350
	}
1351
	return error;
Linus Torvalds's avatar
Linus Torvalds committed
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
}	/* xlog_commit_record */


/*
 * Push on the buffer cache code if we ever use more than 75% of the on-disk
 * log space.  This code pushes on the lsn which would supposedly free up
 * the 25% which we want to leave free.  We may need to adopt a policy which
 * pushes on an lsn which is further along in the log once we reach the high
 * water mark.  In this manner, we would be creating a low water mark.
 */
David Chinner's avatar
David Chinner committed
1362
STATIC void
Linus Torvalds's avatar
Linus Torvalds committed
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
xlog_grant_push_ail(xfs_mount_t	*mp,
		    int		need_bytes)
{
    xlog_t	*log = mp->m_log;	/* pointer to the log */
    xfs_lsn_t	tail_lsn;		/* lsn of the log tail */
    xfs_lsn_t	threshold_lsn = 0;	/* lsn we'd like to be at */
    int		free_blocks;		/* free blocks left to write to */
    int		free_bytes;		/* free bytes left to write to */
    int		threshold_block;	/* block in lsn we'd like to be at */
    int		threshold_cycle;	/* lsn cycle we'd like to be at */
    int		free_threshold;

    ASSERT(BTOBB(need_bytes) < log->l_logBBsize);

Eric Sandeen's avatar
Eric Sandeen committed
1377
    spin_lock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
    free_bytes = xlog_space_left(log,
				 log->l_grant_reserve_cycle,
				 log->l_grant_reserve_bytes);
    tail_lsn = log->l_tail_lsn;
    free_blocks = BTOBBT(free_bytes);

    /*
     * Set the threshold for the minimum number of free blocks in the
     * log to the maximum of what the caller needs, one quarter of the
     * log, and 256 blocks.
     */
    free_threshold = BTOBB(need_bytes);
    free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
    free_threshold = MAX(free_threshold, 256);
    if (free_blocks < free_threshold) {
	threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
	threshold_cycle = CYCLE_LSN(tail_lsn);
	if (threshold_block >= log->l_logBBsize) {
	    threshold_block -= log->l_logBBsize;
	    threshold_cycle += 1;
	}
1399
	threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block);
Linus Torvalds's avatar
Linus Torvalds committed
1400
1401
1402
1403
1404
1405
1406

	/* Don't pass in an lsn greater than the lsn of the last
	 * log record known to be on disk.
	 */
	if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0)
	    threshold_lsn = log->l_last_sync_lsn;
    }
Eric Sandeen's avatar
Eric Sandeen committed
1407
    spin_unlock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444

    /*
     * Get the transaction layer to kick the dirty buffers out to
     * disk asynchronously. No point in trying to do this if
     * the filesystem is shutting down.
     */
    if (threshold_lsn &&
	!XLOG_FORCED_SHUTDOWN(log))
	    xfs_trans_push_ail(mp, threshold_lsn);
}	/* xlog_grant_push_ail */


/*
 * Flush out the in-core log (iclog) to the on-disk log in an asynchronous 
 * fashion.  Previously, we should have moved the current iclog
 * ptr in the log to point to the next available iclog.  This allows further
 * write to continue while this code syncs out an iclog ready to go.
 * Before an in-core log can be written out, the data section must be scanned
 * to save away the 1st word of each BBSIZE block into the header.  We replace
 * it with the current cycle count.  Each BBSIZE block is tagged with the
 * cycle count because there in an implicit assumption that drives will
 * guarantee that entire 512 byte blocks get written at once.  In other words,
 * we can't have part of a 512 byte block written and part not written.  By
 * tagging each block, we will know which blocks are valid when recovering
 * after an unclean shutdown.
 *
 * This routine is single threaded on the iclog.  No other thread can be in
 * this routine with the same iclog.  Changing contents of iclog can there-
 * fore be done without grabbing the state machine lock.  Updating the global
 * log will require grabbing the lock though.
 *
 * The entire log manager uses a logical block numbering scheme.  Only
 * log_sync (and then only bwrite()) know about the fact that the log may
 * not start with block zero on a given device.  The log block start offset
 * is added immediately before calling bwrite().
 */

David Chinner's avatar
David Chinner committed
1445
STATIC int
Linus Torvalds's avatar
Linus Torvalds committed
1446
1447
1448
1449
1450
xlog_sync(xlog_t		*log,
	  xlog_in_core_t	*iclog)
{
	xfs_caddr_t	dptr;		/* pointer to byte sized element */
	xfs_buf_t	*bp;
1451
	int		i;
Linus Torvalds's avatar
Linus Torvalds committed
1452
1453
1454
1455
1456
	uint		count;		/* byte count of bwrite */
	uint		count_init;	/* initial count before roundup */
	int		roundoff;       /* roundoff to BB or stripe */
	int		split = 0;	/* split write into two regions */
	int		error;
1457
	int		v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
Linus Torvalds's avatar
Linus Torvalds committed
1458
1459

	XFS_STATS_INC(xs_log_writes);
1460
	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
Linus Torvalds's avatar
Linus Torvalds committed
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480

	/* Add for LR header */
	count_init = log->l_iclog_hsize + iclog->ic_offset;

	/* Round out the log write size */
	if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
		/* we have a v2 stripe unit to use */
		count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
	} else {
		count = BBTOB(BTOBB(count_init));
	}
	roundoff = count - count_init;
	ASSERT(roundoff >= 0);
	ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && 
                roundoff < log->l_mp->m_sb.sb_logsunit)
		|| 
		(log->l_mp->m_sb.sb_logsunit <= 1 && 
		 roundoff < BBTOB(1)));

	/* move grant heads by roundoff in sync */
Eric Sandeen's avatar
Eric Sandeen committed
1481
	spin_lock(&log->l_grant_lock);
1482
	xlog_grant_add_space(log, roundoff);
Eric Sandeen's avatar
Eric Sandeen committed
1483
	spin_unlock(&log->l_grant_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1484
1485
1486
1487
1488
1489

	/* put cycle number in every block */
	xlog_pack_data(log, iclog, roundoff); 

	/* real byte length */
	if (v2) {
1490
1491
		iclog->ic_header.h_len =
			cpu_to_be32(iclog->ic_offset + roundoff);
Linus Torvalds's avatar
Linus Torvalds committed
1492
	} else {
1493
1494
		iclog->ic_header.h_len =
			cpu_to_be32(iclog->ic_offset);
Linus Torvalds's avatar
Linus Torvalds committed
1495
1496
	}

1497
	bp = iclog->ic_bp;
Linus Torvalds's avatar
Linus Torvalds committed
1498
1499
	ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
1500
	XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
Linus Torvalds's avatar
Linus Torvalds committed
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511

	XFS_STATS_ADD(xs_log_blocks, BTOBB(count));

	/* Do we need to split this write into 2 parts? */
	if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
		split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
		count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
		iclog->ic_bwritecnt = 2;	/* split into 2 writes */
	} else {
		iclog->ic_bwritecnt = 1;
	}
1512
	XFS_BUF_SET_COUNT(bp, count);
Linus Torvalds's avatar
Linus Torvalds committed
1513
	XFS_BUF_SET_FSPRIVATE(bp, iclog);	/* save for later */
1514
	XFS_BUF_ZEROFLAGS(bp);
Linus Torvalds's avatar
Linus Torvalds committed
1515
1516
1517
	XFS_BUF_BUSY(bp);
	XFS_BUF_ASYNC(bp);
	/*
1518
	 * Do an ordered write for the log block.
1519
	 * Its unnecessary to flush the first split block in the log wrap case.
Linus Torvalds's avatar
Linus Torvalds committed
1520
	 */
1521
	if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
1522
		XFS_BUF_ORDERED(bp);
Linus Torvalds's avatar
Linus Torvalds committed
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539

	ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
	ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);

	xlog_verify_iclog(log, iclog, count, B_TRUE);

	/* account for log which doesn't start at block #0 */
	XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
	/*
	 * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
	 * is shutting down.
	 */
	XFS_BUF_WRITE(bp);

	if ((error = XFS_bwrite(bp))) {
		xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
				  XFS_BUF_ADDR(bp));
1540
		return error;
Linus Torvalds's avatar
Linus Torvalds committed
1541
1542
	}
	if (split) {
1543
		bp = iclog->ic_log->l_xbuf;
Linus Torvalds's avatar
Linus Torvalds committed
1544
1545
1546
1547
1548
1549
1550
		ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
							(unsigned long)1);
		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
		XFS_BUF_SET_ADDR(bp, 0);	     /* logical 0 */
		XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
					    (__psint_t)count), split);
		XFS_BUF_SET_FSPRIVATE(bp, iclog);
1551
		XFS_BUF_ZEROFLAGS(bp);
Linus Torvalds's avatar
Linus Torvalds committed
1552
1553
		XFS_BUF_BUSY(bp);
		XFS_BUF_ASYNC(bp);
1554
1555
		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
			XFS_BUF_ORDERED(bp);
Linus Torvalds's avatar
Linus Torvalds committed
1556
1557
1558
1559
1560
1561
1562
		dptr = XFS_BUF_PTR(bp);
		/*
		 * Bump the cycle numbers at the start of each block
		 * since this part of the buffer is at the start of
		 * a new cycle.  Watch out for the header magic number
		 * case, though.
		 */
1563
		for (i = 0; i < split; i += BBSIZE) {
1564
			be32_add_cpu((__be32 *)dptr, 1);
1565
			if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM)
1566
				be32_add_cpu((__be32 *)dptr, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1567
1568
1569
1570
1571
1572
			dptr += BBSIZE;
		}

		ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
		ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);

1573
		/* account for internal log which doesn't start at block #0 */
Linus Torvalds's avatar
Linus Torvalds committed
1574
1575
1576
1577
1578
		XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
		XFS_BUF_WRITE(bp);
		if ((error = XFS_bwrite(bp))) {
			xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
					  bp, XFS_BUF_ADDR(bp));
1579
			return error;
Linus Torvalds's avatar
Linus Torvalds committed
1580
1581
		}
	}
1582
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1583
1584
1585
1586
}	/* xlog_sync */


/*
1587
 * Deallocate a log structure
Linus Torvalds's avatar
Linus Torvalds committed
1588
 */
David Chinner's avatar
David Chinner committed
1589
STATIC void
1590
xlog_dealloc_log(xlog_t *log)
Linus Torvalds's avatar
Linus Torvalds committed
1591
1592
1593
1594
1595
1596
{
	xlog_in_core_t	*iclog, *next_iclog;
	int		i;

	iclog = log->l_iclog;
	for (i=0; i<log->l_iclog_bufs; i++) {
1597
1598
		sv_destroy(&iclog->ic_force_wait);
		sv_destroy(&iclog->ic_write_wait);
Linus Torvalds's avatar
Linus Torvalds committed
1599
		xfs_buf_free(iclog->ic_bp);
1600
		xlog_trace_iclog_dealloc(iclog);
Linus Torvalds's avatar
Linus Torvalds committed
1601
		next_iclog = iclog->ic_next;
1602
		kmem_free(iclog);
Linus Torvalds's avatar
Linus Torvalds committed
1603
1604
1605
1606
1607
1608
		iclog = next_iclog;
	}
	spinlock_destroy(&log->l_icloglock);
	spinlock_destroy(&log->l_grant_lock);

	xfs_buf_free(log->l_xbuf);
1609
	xlog_trace_loggrant_dealloc(log);
Linus Torvalds's avatar
Linus Torvalds committed
1610
	log->l_mp->m_log = NULL;
1611
	kmem_free(log);
1612
}	/* xlog_dealloc_log */
Linus Torvalds's avatar
Linus Torvalds committed
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623

/*
 * Update counters atomically now that memcpy is done.
 */
/* ARGSUSED */
static inline void
xlog_state_finish_copy(xlog_t		*log,
		       xlog_in_core_t	*iclog,
		       int		record_cnt,
		       int		copy_bytes)
{
Eric Sandeen's avatar
Eric Sandeen committed
1624
	spin_lock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
1625

1626
	be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
Linus Torvalds's avatar
Linus Torvalds committed
1627
1628
	iclog->ic_offset += copy_bytes;

Eric Sandeen's avatar
Eric Sandeen committed
1629
	spin_unlock(&log->l_icloglock);
Linus Torvalds's avatar
Linus Torvalds committed
1630
1631
1632
1633
1634
}	/* xlog_state_finish_copy */