PSARC 2009/571 ZFS Deduplication Properties
authorJeff Bonwick <Jeff.Bonwick@Sun.COM>
Sun Nov 01 14:14:46 2009 -0800 (3 months ago)
changeset 10922e2081f502306
parent 109218aac17999e4d
child 10923df470fd79c3c
PSARC 2009/571 ZFS Deduplication Properties
6677093 zfs should have dedup capability
usr/src/cmd/filebench/Makefile.com
usr/src/cmd/mdb/common/modules/zfs/zfs.c
usr/src/cmd/sgs/Makefile.var
usr/src/cmd/zdb/Makefile.com
usr/src/cmd/zdb/zdb.c
usr/src/cmd/zdb/zdb_il.c
usr/src/cmd/zpool/zpool_main.c
usr/src/cmd/ztest/ztest.c
usr/src/common/avl/avl.c
usr/src/common/zfs/zfs_fletcher.c
usr/src/common/zfs/zfs_prop.c
usr/src/common/zfs/zfs_prop.h
usr/src/common/zfs/zpool_prop.c
usr/src/common/zfs/zprop_common.c
usr/src/grub/capability
usr/src/grub/grub-0.97/stage2/fsys_zfs.c
usr/src/grub/grub-0.97/stage2/zfs-include/spa.h
usr/src/grub/grub-0.97/stage2/zfs-include/zap_impl.h
usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
usr/src/lib/libzfs/common/libzfs_dataset.c
usr/src/lib/libzfs/common/libzfs_pool.c
usr/src/lib/libzpool/common/llib-lzpool
usr/src/uts/common/Makefile.files
usr/src/uts/common/fs/vfs.c
usr/src/uts/common/fs/zfs/arc.c
usr/src/uts/common/fs/zfs/bplist.c
usr/src/uts/common/fs/zfs/dbuf.c
usr/src/uts/common/fs/zfs/ddt.c
usr/src/uts/common/fs/zfs/ddt_zap.c
usr/src/uts/common/fs/zfs/dmu.c
usr/src/uts/common/fs/zfs/dmu_objset.c
usr/src/uts/common/fs/zfs/dmu_send.c
usr/src/uts/common/fs/zfs/dmu_traverse.c
usr/src/uts/common/fs/zfs/dmu_tx.c
usr/src/uts/common/fs/zfs/dnode_sync.c
usr/src/uts/common/fs/zfs/dsl_dataset.c
usr/src/uts/common/fs/zfs/dsl_deleg.c
usr/src/uts/common/fs/zfs/dsl_dir.c
usr/src/uts/common/fs/zfs/dsl_pool.c
usr/src/uts/common/fs/zfs/dsl_prop.c
usr/src/uts/common/fs/zfs/dsl_scrub.c
usr/src/uts/common/fs/zfs/dsl_synctask.c
usr/src/uts/common/fs/zfs/lzjb.c
usr/src/uts/common/fs/zfs/metaslab.c
usr/src/uts/common/fs/zfs/sha256.c
usr/src/uts/common/fs/zfs/spa.c
usr/src/uts/common/fs/zfs/spa_history.c
usr/src/uts/common/fs/zfs/spa_misc.c
usr/src/uts/common/fs/zfs/space_map.c
usr/src/uts/common/fs/zfs/sys/arc.h
usr/src/uts/common/fs/zfs/sys/bplist.h
usr/src/uts/common/fs/zfs/sys/dbuf.h
usr/src/uts/common/fs/zfs/sys/ddt.h
usr/src/uts/common/fs/zfs/sys/dmu.h
usr/src/uts/common/fs/zfs/sys/dmu_objset.h
usr/src/uts/common/fs/zfs/sys/dmu_traverse.h
usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
usr/src/uts/common/fs/zfs/sys/dsl_pool.h
usr/src/uts/common/fs/zfs/sys/metaslab.h
usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
usr/src/uts/common/fs/zfs/sys/spa.h
usr/src/uts/common/fs/zfs/sys/spa_impl.h
usr/src/uts/common/fs/zfs/sys/txg.h
usr/src/uts/common/fs/zfs/sys/txg_impl.h
usr/src/uts/common/fs/zfs/sys/uberblock.h
usr/src/uts/common/fs/zfs/sys/vdev.h
usr/src/uts/common/fs/zfs/sys/vdev_impl.h
usr/src/uts/common/fs/zfs/sys/zap.h
usr/src/uts/common/fs/zfs/sys/zap_impl.h
usr/src/uts/common/fs/zfs/sys/zap_leaf.h
usr/src/uts/common/fs/zfs/sys/zil.h
usr/src/uts/common/fs/zfs/sys/zil_impl.h
usr/src/uts/common/fs/zfs/sys/zio.h
usr/src/uts/common/fs/zfs/sys/zio_checksum.h
usr/src/uts/common/fs/zfs/sys/zio_compress.h
usr/src/uts/common/fs/zfs/sys/zio_impl.h
usr/src/uts/common/fs/zfs/txg.c
usr/src/uts/common/fs/zfs/vdev.c
usr/src/uts/common/fs/zfs/vdev_mirror.c
usr/src/uts/common/fs/zfs/vdev_queue.c
usr/src/uts/common/fs/zfs/vdev_raidz.c
usr/src/uts/common/fs/zfs/zap.c
usr/src/uts/common/fs/zfs/zap_leaf.c
usr/src/uts/common/fs/zfs/zap_micro.c
usr/src/uts/common/fs/zfs/zfs_fm.c
usr/src/uts/common/fs/zfs/zfs_ioctl.c
usr/src/uts/common/fs/zfs/zfs_log.c
usr/src/uts/common/fs/zfs/zfs_replay.c
usr/src/uts/common/fs/zfs/zfs_vnops.c
usr/src/uts/common/fs/zfs/zil.c
usr/src/uts/common/fs/zfs/zio.c
usr/src/uts/common/fs/zfs/zio_checksum.c
usr/src/uts/common/fs/zfs/zio_compress.c
usr/src/uts/common/fs/zfs/zio_inject.c
usr/src/uts/common/fs/zfs/zle.c
usr/src/uts/common/fs/zfs/zvol.c
usr/src/uts/common/sys/avl.h
usr/src/uts/common/sys/fs/zfs.h
usr/src/uts/intel/zfs/spa_boot.c
usr/src/uts/sparc/zfs/spa_boot.c
       1 --- a/usr/src/cmd/filebench/Makefile.com	Fri Oct 30 18:47:17 2009 -0600
       2 +++ b/usr/src/cmd/filebench/Makefile.com	Sun Nov 01 14:14:46 2009 -0800
       3 @@ -51,9 +51,9 @@
       4  ROOTFBBINDIR = $(ROOT)/usr/benchmarks/filebench/bin
       5  OBJS = $(SRCS:%.c=%.o) parser_gram.o parser_lex.o
       6  LINTFLAGS += -erroff=E_FUNC_ARG_UNUSED -erroff=E_NAME_DEF_NOT_USED2 \
       7 -	-erroff=E_NAME_USED_NOT_DEF2
       8 +	-erroff=E_NAME_USED_NOT_DEF2 -erroff=E_INCONS_ARG_DECL2
       9  LINTFLAGS64 += -erroff=E_FUNC_ARG_UNUSED -erroff=E_NAME_DEF_NOT_USED2 \
      10 -	-erroff=E_NAME_USED_NOT_DEF2
      11 +	-erroff=E_NAME_USED_NOT_DEF2 -erroff=E_INCONS_ARG_DECL2
      12  LINTFILES = $(SRCS:%.c=%.ln)
      13  CLEANFILES += parser_gram.c parser_gram.h parser_lex.c y.tab.h y.tab.c
      14  
     1.1 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Fri Oct 30 18:47:17 2009 -0600
     1.2 +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c	Sun Nov 01 14:14:46 2009 -0800
     1.3 @@ -35,7 +35,6 @@
     1.4  #include <sys/list.h>
     1.5  #include <sys/spa_impl.h>
     1.6  #include <sys/vdev_impl.h>
     1.7 -#include <sys/zio_compress.h>
     1.8  #include <ctype.h>
     1.9  
    1.10  #ifndef _KERNEL
    1.11 @@ -47,15 +46,6 @@
    1.12  #else
    1.13  #define	ZFS_OBJ_NAME	"libzpool.so.1"
    1.14  #endif
    1.15 -
    1.16 -static char *
    1.17 -local_strdup(const char *s)
    1.18 -{
    1.19 -	char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
    1.20 -
    1.21 -	(void) strcpy(s1, s);
    1.22 -	return (s1);
    1.23 -}
    1.24  
    1.25  static int
    1.26  getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
    1.27 @@ -128,27 +118,6 @@
    1.28  	off /= 8;
    1.29  
    1.30  	return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
    1.31 -}
    1.32 -
    1.33 -static int
    1.34 -read_symbol(char *sym_name, void **bufp)
    1.35 -{
    1.36 -	GElf_Sym sym;
    1.37 -
    1.38 -	if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
    1.39 -		mdb_warn("can't find symbol %s", sym_name);
    1.40 -		return (DCMD_ERR);
    1.41 -	}
    1.42 -
    1.43 -	*bufp = mdb_alloc(sym.st_size, UM_SLEEP);
    1.44 -
    1.45 -	if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
    1.46 -		mdb_warn("can't read data for symbol %s", sym_name);
    1.47 -		mdb_free(*bufp, sym.st_size);
    1.48 -		return (DCMD_ERR);
    1.49 -	}
    1.50 -
    1.51 -	return (DCMD_OK);
    1.52  }
    1.53  
    1.54  static int verbose;
    1.55 @@ -305,30 +274,6 @@
    1.56  
    1.57  /* ARGSUSED */
    1.58  static int
    1.59 -zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    1.60 -{
    1.61 -	mdb_ctf_id_t pipe_enum;
    1.62 -	int i;
    1.63 -	char stage[1024];
    1.64 -
    1.65 -	if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
    1.66 -		mdb_warn("Could not find enum zio_stage");
    1.67 -		return (DCMD_ERR);
    1.68 -	}
    1.69 -
    1.70 -	for (i = 0; i < 32; i++) {
    1.71 -		if (addr & (1U << i)) {
    1.72 -			enum_lookup(stage, sizeof (stage), pipe_enum, i,
    1.73 -			    "ZIO_STAGE_");
    1.74 -			mdb_printf("    %s\n", stage);
    1.75 -		}
    1.76 -	}
    1.77 -
    1.78 -	return (DCMD_OK);
    1.79 -}
    1.80 -
    1.81 -/* ARGSUSED */
    1.82 -static int
    1.83  zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
    1.84  {
    1.85  	/*
    1.86 @@ -351,9 +296,8 @@
    1.87  		"metaslab_aliquot",
    1.88  		"reference_tracking_enable",
    1.89  		"reference_history",
    1.90 -		"zio_taskq_threads",
    1.91  		"spa_max_replication_override",
    1.92 -		"spa_mode",
    1.93 +		"spa_mode_global",
    1.94  		"zfs_flags",
    1.95  		"zfs_txg_synctime",
    1.96  		"zfs_txg_timeout",
    1.97 @@ -383,9 +327,8 @@
    1.98  		"zio_injection_enabled",
    1.99  		"zvol_immediate_write_sz",
   1.100  	};
   1.101 -	int i;
   1.102  
   1.103 -	for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
   1.104 +	for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
   1.105  		int sz;
   1.106  		uint64_t val64;
   1.107  		uint32_t *val32p = (uint32_t *)&val64;
   1.108 @@ -407,76 +350,33 @@
   1.109  static int
   1.110  blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
   1.111  {
   1.112 -	blkptr_t bp;
   1.113 -	dmu_object_type_info_t *doti;
   1.114 -	zio_compress_info_t *zct;
   1.115 -	zio_checksum_info_t *zci;
   1.116 -	int i;
   1.117 -	char buf[MAXPATHLEN];
   1.118 +	mdb_ctf_id_t type_enum, checksum_enum, compress_enum;
   1.119 +	char type[80], checksum[80], compress[80];
   1.120 +	blkptr_t blk, *bp = &blk;
   1.121 +	char buf[BP_SPRINTF_LEN];
   1.122  
   1.123 -	if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
   1.124 +	if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
   1.125  		mdb_warn("failed to read blkptr_t");
   1.126  		return (DCMD_ERR);
   1.127  	}
   1.128  
   1.129 -	if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
   1.130 +	if (mdb_ctf_lookup_by_name("enum dmu_object_type", &type_enum) == -1 ||
   1.131 +	    mdb_ctf_lookup_by_name("enum zio_checksum", &checksum_enum) == -1 ||
   1.132 +	    mdb_ctf_lookup_by_name("enum zio_compress", &compress_enum) == -1) {
   1.133 +		mdb_warn("Could not find blkptr enumerated types");
   1.134  		return (DCMD_ERR);
   1.135 -	for (i = 0; i < DMU_OT_NUMTYPES; i++) {
   1.136 -		mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
   1.137 -		doti[i].ot_name = local_strdup(buf);
   1.138  	}
   1.139  
   1.140 -	if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
   1.141 -		return (DCMD_ERR);
   1.142 -	for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
   1.143 -		mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
   1.144 -		zci[i].ci_name = local_strdup(buf);
   1.145 -	}
   1.146 +	enum_lookup(type, sizeof (type), type_enum,
   1.147 +	    BP_GET_TYPE(bp), "DMU_OT_");
   1.148 +	enum_lookup(checksum, sizeof (checksum), checksum_enum,
   1.149 +	    BP_GET_CHECKSUM(bp), "ZIO_CHECKSUM_");
   1.150 +	enum_lookup(compress, sizeof (compress), compress_enum,
   1.151 +	    BP_GET_COMPRESS(bp), "ZIO_COMPRESS_");
   1.152  
   1.153 -	if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
   1.154 -		return (DCMD_ERR);
   1.155 -	for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
   1.156 -		mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
   1.157 -		zct[i].ci_name = local_strdup(buf);
   1.158 -	}
   1.159 +	SPRINTF_BLKPTR(mdb_snprintf, '\n', buf, bp, type, checksum, compress);
   1.160  
   1.161 -	/*
   1.162 -	 * Super-ick warning:  This code is also duplicated in
   1.163 -	 * cmd/zdb.c .   Yeah, I hate code replication, too.
   1.164 -	 */
   1.165 -	for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
   1.166 -		dva_t *dva = &bp.blk_dva[i];
   1.167 -
   1.168 -		mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
   1.169 -		    DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
   1.170 -		mdb_printf("DVA[%d]:       GANG: %-5s  GRID:  %04x\t"
   1.171 -		    "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
   1.172 -		    (int)DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
   1.173 -		mdb_printf("DVA[%d]: %llu:%llx:%llx:%s%s%s%s\n", i,
   1.174 -		    DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
   1.175 -		    BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
   1.176 -		    !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
   1.177 -		    DVA_GET_GANG(dva) ? "g" : "",
   1.178 -		    BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
   1.179 -	}
   1.180 -	mdb_printf("LSIZE:  %-16llx\t\tPSIZE: %llx\n",
   1.181 -	    BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
   1.182 -	mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
   1.183 -	    BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
   1.184 -	    BP_GET_TYPE(&bp) < DMU_OT_NUMTYPES ?
   1.185 -	    doti[BP_GET_TYPE(&bp)].ot_name : "UNKNOWN");
   1.186 -	mdb_printf("BIRTH:  %-16llx   LEVEL: %-2d\tFILL:  %llx\n",
   1.187 -	    bp.blk_birth, (int)BP_GET_LEVEL(&bp), bp.blk_fill);
   1.188 -	mdb_printf("CKFUNC: %-16s\t\tCOMP:  %s\n",
   1.189 -	    BP_GET_CHECKSUM(&bp) < ZIO_CHECKSUM_FUNCTIONS ?
   1.190 -	    zci[BP_GET_CHECKSUM(&bp)].ci_name : "UNKNOWN",
   1.191 -	    BP_GET_COMPRESS(&bp) < ZIO_COMPRESS_FUNCTIONS ?
   1.192 -	    zct[BP_GET_COMPRESS(&bp)].ci_name : "UNKNOWN");
   1.193 -	mdb_printf("CKSUM:  %llx:%llx:%llx:%llx\n",
   1.194 -	    bp.blk_cksum.zc_word[0],
   1.195 -	    bp.blk_cksum.zc_word[1],
   1.196 -	    bp.blk_cksum.zc_word[2],
   1.197 -	    bp.blk_cksum.zc_word[3]);
   1.198 +	mdb_printf("%s\n", buf);
   1.199  
   1.200  	return (DCMD_OK);
   1.201  }
   1.202 @@ -2293,7 +2193,6 @@
   1.203  	    "zio_t summary", zio_print },
   1.204  	{ "zio_state", "?", "print out all zio_t structures on system or "
   1.205  	    "for a particular pool", zio_state },
   1.206 -	{ "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
   1.207  	{ "zfs_blkstats", ":[-v]",
   1.208  	    "given a spa_t, print block type stats from last scrub",
   1.209  	    zfs_blkstats },
     2.1 --- a/usr/src/cmd/sgs/Makefile.var	Fri Oct 30 18:47:17 2009 -0600
     2.2 +++ b/usr/src/cmd/sgs/Makefile.var	Sun Nov 01 14:14:46 2009 -0800
     2.3 @@ -75,7 +75,7 @@
     2.4  # the system.
     2.5  #
     2.6  VAR_AVLDIR=		$(SRCBASE)/common/avl
     2.7 -VAR_AVLINCDIR=
     2.8 +VAR_AVLINCDIR=		-I $(SRCBASE)/uts/common
     2.9  
    2.10  #
    2.11  # VAR_DTRDIR - directory to find dtrace_data.c in.
     3.1 --- a/usr/src/cmd/zdb/Makefile.com	Fri Oct 30 18:47:17 2009 -0600
     3.2 +++ b/usr/src/cmd/zdb/Makefile.com	Sun Nov 01 14:14:46 2009 -0800
     3.3 @@ -33,6 +33,7 @@
     3.4  
     3.5  INCS += -I../../../lib/libzpool/common 
     3.6  INCS +=	-I../../../uts/common/fs/zfs
     3.7 +INCS +=	-I../../../common/zfs
     3.8  
     3.9  LDLIBS += -lzpool -lumem -lnvpair -lzfs -lavl
    3.10  
     4.1 --- a/usr/src/cmd/zdb/zdb.c	Fri Oct 30 18:47:17 2009 -0600
     4.2 +++ b/usr/src/cmd/zdb/zdb.c	Sun Nov 01 14:14:46 2009 -0800
     4.3 @@ -51,6 +51,7 @@
     4.4  #include <sys/zio_compress.h>
     4.5  #include <sys/zfs_fuid.h>
     4.6  #include <sys/arc.h>
     4.7 +#include <sys/ddt.h>
     4.8  #undef ZFS_MAXNAMELEN
     4.9  #undef verify
    4.10  #include <libzfs.h>
    4.11 @@ -72,8 +73,6 @@
    4.12  uint64_t *zopt_object = NULL;
    4.13  int zopt_objects = 0;
    4.14  libzfs_handle_t *g_zfs;
    4.15 -boolean_t zdb_sig_user_data = B_TRUE;
    4.16 -int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
    4.17  
    4.18  /*
    4.19   * These libumem hooks provide a reasonable set of defaults for the allocator's
    4.20 @@ -121,8 +120,7 @@
    4.21  	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
    4.22  	    "all data) blocks\n");
    4.23  	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
    4.24 -	(void) fprintf(stderr, "        -S <user|all>:<cksum_alg|all> -- "
    4.25 -	    "dump blkptr signatures\n");
    4.26 +	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
    4.27  	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
    4.28  	(void) fprintf(stderr, "        -l dump label contents\n");
    4.29  	(void) fprintf(stderr, "        -L disable leak tracking (do not "
    4.30 @@ -540,6 +538,198 @@
    4.31  }
    4.32  
    4.33  static void
    4.34 +dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
    4.35 +{
    4.36 +	const ddt_phys_t *ddp = dde->dde_phys;
    4.37 +	const ddt_key_t *ddk = &dde->dde_key;
    4.38 +	char *types[4] = { "ditto", "single", "double", "triple" };
    4.39 +	char blkbuf[BP_SPRINTF_LEN];
    4.40 +	blkptr_t blk;
    4.41 +
    4.42 +	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
    4.43 +		if (ddp->ddp_phys_birth == 0)
    4.44 +			continue;
    4.45 +		ddt_bp_create(ddt, ddk, ddp, &blk);
    4.46 +		sprintf_blkptr(blkbuf, &blk);
    4.47 +		(void) printf("index %llx refcnt %llu %s %s\n",
    4.48 +		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
    4.49 +		    types[p], blkbuf);
    4.50 +	}
    4.51 +}
    4.52 +
    4.53 +static void
    4.54 +dump_dedup_ratio(const ddt_stat_t *dds)
    4.55 +{
    4.56 +	double rL, rP, rD, D, dedup, compress, copies;
    4.57 +
    4.58 +	if (dds->dds_blocks == 0)
    4.59 +		return;
    4.60 +
    4.61 +	rL = (double)dds->dds_ref_lsize;
    4.62 +	rP = (double)dds->dds_ref_psize;
    4.63 +	rD = (double)dds->dds_ref_dsize;
    4.64 +	D = (double)dds->dds_dsize;
    4.65 +
    4.66 +	dedup = rD / D;
    4.67 +	compress = rL / rP;
    4.68 +	copies = rD / rP;
    4.69 +
    4.70 +	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
    4.71 +	    "dedup * compress / copies = %.2f\n\n",
    4.72 +	    dedup, compress, copies, dedup * compress / copies);
    4.73 +}
    4.74 +
    4.75 +static void
    4.76 +dump_ddt_stat(const ddt_stat_t *dds, int h)
    4.77 +{
    4.78 +	char refcnt[6];
    4.79 +	char blocks[6], lsize[6], psize[6], dsize[6];
    4.80 +	char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
    4.81 +
    4.82 +	if (dds->dds_blocks == 0)
    4.83 +		return;
    4.84 +
    4.85 +	if (h == -1)
    4.86 +		(void) strcpy(refcnt, "Total");
    4.87 +	else
    4.88 +		nicenum(1ULL << h, refcnt);
    4.89 +
    4.90 +	nicenum(dds->dds_blocks, blocks);
    4.91 +	nicenum(dds->dds_lsize, lsize);
    4.92 +	nicenum(dds->dds_psize, psize);
    4.93 +	nicenum(dds->dds_dsize, dsize);
    4.94 +	nicenum(dds->dds_ref_blocks, ref_blocks);
    4.95 +	nicenum(dds->dds_ref_lsize, ref_lsize);
    4.96 +	nicenum(dds->dds_ref_psize, ref_psize);
    4.97 +	nicenum(dds->dds_ref_dsize, ref_dsize);
    4.98 +
    4.99 +	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
   4.100 +	    refcnt,
   4.101 +	    blocks, lsize, psize, dsize,
   4.102 +	    ref_blocks, ref_lsize, ref_psize, ref_dsize);
   4.103 +}
   4.104 +
   4.105 +static void
   4.106 +dump_ddt_histogram(const ddt_histogram_t *ddh)
   4.107 +{
   4.108 +	ddt_stat_t dds_total = { 0 };
   4.109 +
   4.110 +	ddt_histogram_stat(&dds_total, ddh);
   4.111 +
   4.112 +	(void) printf("\n");
   4.113 +
   4.114 +	(void) printf("bucket   "
   4.115 +	    "           allocated             "
   4.116 +	    "          referenced          \n");
   4.117 +	(void) printf("______   "
   4.118 +	    "______________________________   "
   4.119 +	    "______________________________\n");
   4.120 +
   4.121 +	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
   4.122 +	    "refcnt",
   4.123 +	    "blocks", "LSIZE", "PSIZE", "DSIZE",
   4.124 +	    "blocks", "LSIZE", "PSIZE", "DSIZE");
   4.125 +
   4.126 +	(void) printf("%6s   %6s   %5s   %5s   %5s   %6s   %5s   %5s   %5s\n",
   4.127 +	    "------",
   4.128 +	    "------", "-----", "-----", "-----",
   4.129 +	    "------", "-----", "-----", "-----");
   4.130 +
   4.131 +	for (int h = 0; h < 64; h++)
   4.132 +		dump_ddt_stat(&ddh->ddh_stat[h], h);
   4.133 +
   4.134 +	dump_ddt_stat(&dds_total, -1);
   4.135 +
   4.136 +	(void) printf("\n");
   4.137 +}
   4.138 +
   4.139 +static void
   4.140 +dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
   4.141 +{
   4.142 +	char name[DDT_NAMELEN];
   4.143 +	ddt_entry_t dde;
   4.144 +	uint64_t walk = 0;
   4.145 +	dmu_object_info_t doi;
   4.146 +	uint64_t count, dspace, mspace;
   4.147 +	int error;
   4.148 +
   4.149 +	error = ddt_object_info(ddt, type, class, &doi);
   4.150 +
   4.151 +	if (error == ENOENT)
   4.152 +		return;
   4.153 +	ASSERT(error == 0);
   4.154 +
   4.155 +	count = ddt_object_count(ddt, type, class);
   4.156 +	dspace = doi.doi_physical_blocks_512 << 9;
   4.157 +	mspace = doi.doi_fill_count * doi.doi_data_block_size;
   4.158 +
   4.159 +	ASSERT(count != 0);	/* we should have destroyed it */
   4.160 +
   4.161 +	ddt_object_name(ddt, type, class, name);
   4.162 +
   4.163 +	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
   4.164 +	    name,
   4.165 +	    (u_longlong_t)count,
   4.166 +	    (u_longlong_t)(dspace / count),
   4.167 +	    (u_longlong_t)(mspace / count));
   4.168 +
   4.169 +	if (dump_opt['D'] < 3)
   4.170 +		return;
   4.171 +
   4.172 +	dump_ddt_histogram(&ddt->ddt_histogram[type][class]);
   4.173 +
   4.174 +	if (dump_opt['D'] < 4)
   4.175 +		return;
   4.176 +
   4.177 +	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
   4.178 +		return;
   4.179 +
   4.180 +	(void) printf("%s contents:\n\n", name);
   4.181 +
   4.182 +	while ((error = ddt_object_walk(ddt, type, class, &dde, &walk)) == 0)
   4.183 +		dump_dde(ddt, &dde, walk);
   4.184 +
   4.185 +	ASSERT(error == ENOENT);
   4.186 +
   4.187 +	(void) printf("\n");
   4.188 +}
   4.189 +
   4.190 +static void
   4.191 +dump_all_ddts(spa_t *spa)
   4.192 +{
   4.193 +	ddt_histogram_t ddh_total = { 0 };
   4.194 +	ddt_stat_t dds_total = { 0 };
   4.195 +
   4.196 +	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
   4.197 +		ddt_t *ddt = spa->spa_ddt[c];
   4.198 +		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
   4.199 +			for (enum ddt_class class = 0; class < DDT_CLASSES;
   4.200 +			    class++) {
   4.201 +				ddt_histogram_add(&ddh_total,
   4.202 +				    &ddt->ddt_histogram[type][class]);
   4.203 +				dump_ddt(ddt, type, class);
   4.204 +			}
   4.205 +		}
   4.206 +	}
   4.207 +
   4.208 +	ddt_histogram_stat(&dds_total, &ddh_total);
   4.209 +
   4.210 +	if (dds_total.dds_blocks == 0) {
   4.211 +		(void) printf("All DDTs are empty\n");
   4.212 +		return;
   4.213 +	}
   4.214 +
   4.215 +	(void) printf("\n");
   4.216 +
   4.217 +	if (dump_opt['D'] > 1) {
   4.218 +		(void) printf("DDT histogram (aggregated over all DDTs):\n");
   4.219 +		dump_ddt_histogram(&ddh_total);
   4.220 +	}
   4.221 +
   4.222 +	dump_dedup_ratio(&dds_total);
   4.223 +}
   4.224 +
   4.225 +static void
   4.226  dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
   4.227  {
   4.228  	char *prefix = (void *)sm;
   4.229 @@ -658,35 +848,48 @@
   4.230  }
   4.231  
   4.232  static uint64_t
   4.233 -blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
   4.234 +blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
   4.235  {
   4.236 -	if (level < 0)
   4.237 -		return (blkid);
   4.238 +	if (dnp == NULL) {
   4.239 +		ASSERT(zb->zb_level < 0);
   4.240 +		if (zb->zb_object == 0)
   4.241 +			return (zb->zb_blkid);
   4.242 +		return (zb->zb_blkid * BP_GET_LSIZE(bp));
   4.243 +	}
   4.244  
   4.245 -	return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
   4.246 +	ASSERT(zb->zb_level >= 0);
   4.247 +
   4.248 +	return ((zb->zb_blkid <<
   4.249 +	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
   4.250  	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
   4.251  }
   4.252  
   4.253  static void
   4.254 -sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
   4.255 +sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp)
   4.256  {
   4.257  	dva_t *dva = bp->blk_dva;
   4.258 -	int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
   4.259 -	int i;
   4.260 +	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
   4.261 +
   4.262 +	if (dump_opt['b'] >= 5) {
   4.263 +		sprintf_blkptr(blkbuf, bp);
   4.264 +		return;
   4.265 +	}
   4.266  
   4.267  	blkbuf[0] = '\0';
   4.268  
   4.269 -	for (i = 0; i < ndvas; i++)
   4.270 +	for (int i = 0; i < ndvas; i++)
   4.271  		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
   4.272  		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
   4.273  		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
   4.274  		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
   4.275  
   4.276 -	(void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
   4.277 +	(void) sprintf(blkbuf + strlen(blkbuf),
   4.278 +	    "%llxL/%llxP F=%llu B=%llu/%llu",
   4.279  	    (u_longlong_t)BP_GET_LSIZE(bp),
   4.280  	    (u_longlong_t)BP_GET_PSIZE(bp),
   4.281  	    (u_longlong_t)bp->blk_fill,
   4.282 -	    (u_longlong_t)bp->blk_birth);
   4.283 +	    (u_longlong_t)bp->blk_birth,
   4.284 +	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
   4.285  }
   4.286  
   4.287  static void
   4.288 @@ -699,8 +902,7 @@
   4.289  	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
   4.290  	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
   4.291  
   4.292 -	(void) printf("%16llx ",
   4.293 -	    (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
   4.294 +	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
   4.295  
   4.296  	ASSERT(zb->zb_level >= 0);
   4.297  
   4.298 @@ -712,16 +914,8 @@
   4.299  		}
   4.300  	}
   4.301  
   4.302 -	sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
   4.303 +	sprintf_blkptr_compact(blkbuf, bp);
   4.304  	(void) printf("%s\n", blkbuf);
   4.305 -}
   4.306 -
   4.307 -#define	SET_BOOKMARK(zb, objset, object, level, blkid)  \
   4.308 -{                                                       \
   4.309 -	(zb)->zb_objset = objset;                       \
   4.310 -	(zb)->zb_object = object;                       \
   4.311 -	(zb)->zb_level = level;                         \
   4.312 -	(zb)->zb_blkid = blkid;                         \
   4.313  }
   4.314  
   4.315  static int
   4.316 @@ -859,7 +1053,7 @@
   4.317  	nicenum(ds->ds_compressed_bytes, compressed);
   4.318  	nicenum(ds->ds_uncompressed_bytes, uncompressed);
   4.319  	nicenum(ds->ds_unique_bytes, unique);
   4.320 -	sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
   4.321 +	sprintf_blkptr(blkbuf, &ds->ds_bp);
   4.322  
   4.323  	(void) printf("\t\tdir_obj = %llu\n",
   4.324  	    (u_longlong_t)ds->ds_dir_obj);
   4.325 @@ -910,11 +1104,11 @@
   4.326  	if (dump_opt['d'] < 3)
   4.327  		return;
   4.328  
   4.329 -	mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
   4.330 +	bplist_init(&bpl);
   4.331  	VERIFY(0 == bplist_open(&bpl, mos, object));
   4.332  	if (bplist_empty(&bpl)) {
   4.333  		bplist_close(&bpl);
   4.334 -		mutex_destroy(&bpl.bpl_lock);
   4.335 +		bplist_fini(&bpl);
   4.336  		return;
   4.337  	}
   4.338  
   4.339 @@ -932,7 +1126,7 @@
   4.340  
   4.341  	if (dump_opt['d'] < 5) {
   4.342  		bplist_close(&bpl);
   4.343 -		mutex_destroy(&bpl.bpl_lock);
   4.344 +		bplist_fini(&bpl);
   4.345  		return;
   4.346  	}
   4.347  
   4.348 @@ -942,13 +1136,13 @@
   4.349  		char blkbuf[BP_SPRINTF_LEN];
   4.350  
   4.351  		ASSERT(bp->blk_birth != 0);
   4.352 -		sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
   4.353 +		sprintf_blkptr_compact(blkbuf, bp);
   4.354  		(void) printf("\tItem %3llu: %s\n",
   4.355  		    (u_longlong_t)itor - 1, blkbuf);
   4.356  	}
   4.357  
   4.358  	bplist_close(&bpl);
   4.359 -	mutex_destroy(&bpl.bpl_lock);
   4.360 +	bplist_fini(&bpl);
   4.361  }
   4.362  
   4.363  static avl_tree_t idx_tree;
   4.364 @@ -1107,6 +1301,8 @@
   4.365  	dump_zap,		/* ZFS user/group used		*/
   4.366  	dump_zap,		/* ZFS user/group quota		*/
   4.367  	dump_zap,		/* snapshot refcount tags	*/
   4.368 +	dump_none,		/* DDT ZAP object		*/
   4.369 +	dump_zap,		/* DDT statistics		*/
   4.370  	dump_unknown		/* Unknown type, must be last	*/
   4.371  };
   4.372  
   4.373 @@ -1118,13 +1314,14 @@
   4.374  	dnode_t *dn;
   4.375  	void *bonus = NULL;
   4.376  	size_t bsize = 0;
   4.377 -	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
   4.378 +	char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
   4.379  	char aux[50];
   4.380  	int error;
   4.381  
   4.382  	if (*print_header) {
   4.383 -		(void) printf("\n    Object  lvl   iblk   dblk  lsize"
   4.384 -		    "  asize  type\n");
   4.385 +		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
   4.386 +		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
   4.387 +		    "%full", "type");
   4.388  		*print_header = 0;
   4.389  	}
   4.390  
   4.391 @@ -1143,10 +1340,11 @@
   4.392  
   4.393  	nicenum(doi.doi_metadata_block_size, iblk);
   4.394  	nicenum(doi.doi_data_block_size, dblk);
   4.395 -	nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
   4.396 -	    lsize);
   4.397 -	nicenum(doi.doi_physical_blks << 9, asize);
   4.398 +	nicenum(doi.doi_max_offset, lsize);
   4.399 +	nicenum(doi.doi_physical_blocks_512 << 9, asize);
   4.400  	nicenum(doi.doi_bonus_size, bonus_size);
   4.401 +	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
   4.402 +	    doi.doi_data_block_size / doi.doi_max_offset);
   4.403  
   4.404  	aux[0] = '\0';
   4.405  
   4.406 @@ -1160,13 +1358,13 @@
   4.407  		    ZDB_COMPRESS_NAME(doi.doi_compress));
   4.408  	}
   4.409  
   4.410 -	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %s%s\n",
   4.411 -	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
   4.412 -	    asize, ZDB_OT_NAME(doi.doi_type), aux);
   4.413 +	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
   4.414 +	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
   4.415 +	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
   4.416  
   4.417  	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
   4.418 -		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %s\n",
   4.419 -		    "", "", "", "", bonus_size, "bonus",
   4.420 +		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
   4.421 +		    "", "", "", "", "", bonus_size, "bonus",
   4.422  		    ZDB_OT_NAME(doi.doi_bonus_type));
   4.423  	}
   4.424  
   4.425 @@ -1203,6 +1401,7 @@
   4.426  		}
   4.427  
   4.428  		for (;;) {
   4.429 +			char segsize[6];
   4.430  			error = dnode_next_offset(dn,
   4.431  			    0, &start, minlvl, blkfill, 0);
   4.432  			if (error)
   4.433 @@ -1261,8 +1460,7 @@
   4.434  
   4.435  	if (verbosity >= 4) {
   4.436  		(void) sprintf(blkbuf, ", rootbp ");
   4.437 -		(void) sprintf_blkptr(blkbuf + strlen(blkbuf),
   4.438 -		    BP_SPRINTF_LEN - strlen(blkbuf), os->os_rootbp);
   4.439 +		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
   4.440  	} else {
   4.441  		blkbuf[0] = '\0';
   4.442  	}
   4.443 @@ -1275,7 +1473,16 @@
   4.444  	    (u_longlong_t)dds.dds_creation_txg,
   4.445  	    numbuf, (u_longlong_t)usedobjs, blkbuf);
   4.446  
   4.447 -	dump_intent_log(dmu_objset_zil(os));
   4.448 +	if (zopt_objects != 0) {
   4.449 +		for (i = 0; i < zopt_objects; i++)
   4.450 +			dump_object(os, zopt_object[i], verbosity,
   4.451 +			    &print_header);
   4.452 +		(void) printf("\n");
   4.453 +		return;
   4.454 +	}
   4.455 +
   4.456 +	if (dump_opt['i'] != 0 || verbosity >= 2)
   4.457 +		dump_intent_log(dmu_objset_zil(os));
   4.458  
   4.459  	if (dmu_objset_ds(os) != NULL)
   4.460  		dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
   4.461 @@ -1286,14 +1493,6 @@
   4.462  
   4.463  	if (os->os_rootbp->blk_birth == 0)
   4.464  		return;
   4.465 -
   4.466 -	if (zopt_objects != 0) {
   4.467 -		for (i = 0; i < zopt_objects; i++)
   4.468 -			dump_object(os, zopt_object[i], verbosity,
   4.469 -			    &print_header);
   4.470 -		(void) printf("\n");
   4.471 -		return;
   4.472 -	}
   4.473  
   4.474  	dump_object(os, 0, verbosity, &print_header);
   4.475  	object_count = 0;
   4.476 @@ -1333,7 +1532,7 @@
   4.477  	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
   4.478  	if (dump_opt['u'] >= 3) {
   4.479  		char blkbuf[BP_SPRINTF_LEN];
   4.480 -		sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
   4.481 +		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
   4.482  		(void) printf("\trootbp = %s\n", blkbuf);
   4.483  	}
   4.484  	(void) printf("\n");
   4.485 @@ -1466,12 +1665,166 @@
   4.486  
   4.487  	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
   4.488  	if (error) {
   4.489 -		(void) printf("Could not open %s\n", dsname);
   4.490 +		(void) printf("Could not open %s, error %d\n", dsname, error);
   4.491  		return (0);
   4.492  	}
   4.493  	dump_dir(os);
   4.494  	dmu_objset_disown(os, FTAG);
   4.495  	fuid_table_destroy();
   4.496 +	return (0);
   4.497 +}
   4.498 +
   4.499 +/*
   4.500 + * Block statistics.
   4.501 + */
   4.502 +typedef struct zdb_blkstats {
   4.503 +	uint64_t	zb_asize;
   4.504 +	uint64_t	zb_lsize;
   4.505 +	uint64_t	zb_psize;
   4.506 +	uint64_t	zb_count;
   4.507 +} zdb_blkstats_t;
   4.508 +
   4.509 +/*
   4.510 + * Extended object types to report deferred frees and dedup auto-ditto blocks.
   4.511 + */
   4.512 +#define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
   4.513 +#define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
   4.514 +#define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 2)
   4.515 +
   4.516 +static char *zdb_ot_extname[] = {
   4.517 +	"deferred free",
   4.518 +	"dedup ditto",
   4.519 +	"Total",
   4.520 +};
   4.521 +
   4.522 +#define	ZB_TOTAL	DN_MAX_LEVELS
   4.523 +
   4.524 +typedef struct zdb_cb {
   4.525 +	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
   4.526 +	uint64_t	zcb_dedup_asize;
   4.527 +	uint64_t	zcb_dedup_blocks;
   4.528 +	uint64_t	zcb_errors[256];
   4.529 +	int		zcb_readfails;
   4.530 +	int		zcb_haderrors;
   4.531 +} zdb_cb_t;
   4.532 +
   4.533 +static void
   4.534 +zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp,
   4.535 +    dmu_object_type_t type)
   4.536 +{
   4.537 +	uint64_t refcnt = 0;
   4.538 +
   4.539 +	ASSERT(type < ZDB_OT_TOTAL);
   4.540 +
   4.541 +	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
   4.542 +		return;
   4.543 +
   4.544 +	for (int i = 0; i < 4; i++) {
   4.545 +		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
   4.546 +		int t = (i & 1) ? type : ZDB_OT_TOTAL;
   4.547 +		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
   4.548 +
   4.549 +		zb->zb_asize += BP_GET_ASIZE(bp);
   4.550 +		zb->zb_lsize += BP_GET_LSIZE(bp);
   4.551 +		zb->zb_psize += BP_GET_PSIZE(bp);
   4.552 +		zb->zb_count++;
   4.553 +	}
   4.554 +
   4.555 +	if (dump_opt['L'])
   4.556 +		return;
   4.557 +
   4.558 +	if (BP_GET_DEDUP(bp)) {
   4.559 +		ddt_t *ddt;
   4.560 +		ddt_entry_t *dde;
   4.561 +
   4.562 +		ddt = ddt_select(spa, bp);
   4.563 +		ddt_enter(ddt);
   4.564 +		dde = ddt_lookup(ddt, bp, B_FALSE);
   4.565 +
   4.566 +		if (dde == NULL) {
   4.567 +			refcnt = 0;
   4.568 +		} else {
   4.569 +			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
   4.570 +			ddt_phys_decref(ddp);
   4.571 +			refcnt = ddp->ddp_refcnt;
   4.572 +			if (ddt_phys_total_refcnt(dde) == 0)
   4.573 +				ddt_remove(ddt, dde);
   4.574 +		}
   4.575 +		ddt_exit(ddt);
   4.576 +	}
   4.577 +
   4.578 +	VERIFY3U(zio_wait(zio_claim(NULL, spa,
   4.579 +	    refcnt ? 0 : spa_first_txg(spa),
   4.580 +	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
   4.581 +}
   4.582 +
   4.583 +static int
   4.584 +zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
   4.585 +    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
   4.586 +{
   4.587 +	zdb_cb_t *zcb = arg;
   4.588 +	char blkbuf[BP_SPRINTF_LEN];
   4.589 +	dmu_object_type_t type;
   4.590 +	boolean_t is_metadata;
   4.591 +
   4.592 +	if (bp == NULL)
   4.593 +		return (0);
   4.594 +
   4.595 +	type = BP_GET_TYPE(bp);
   4.596 +
   4.597 +	zdb_count_block(spa, zilog, zcb, bp, type);
   4.598 +
   4.599 +	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
   4.600 +
   4.601 +	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
   4.602 +		int ioerr;
   4.603 +		size_t size = BP_GET_PSIZE(bp);
   4.604 +		void *data = malloc(size);
   4.605 +		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
   4.606 +
   4.607 +		/* If it's an intent log block, failure is expected. */
   4.608 +		if (zb->zb_level == ZB_ZIL_LEVEL)
   4.609 +			flags |= ZIO_FLAG_SPECULATIVE;
   4.610 +
   4.611 +		ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
   4.612 +		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
   4.613 +
   4.614 +		free(data);
   4.615 +
   4.616 +		if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
   4.617 +			zcb->zcb_haderrors = 1;
   4.618 +			zcb->zcb_errors[ioerr]++;
   4.619 +
   4.620 +			if (dump_opt['b'] >= 2)
   4.621 +				sprintf_blkptr(blkbuf, bp);
   4.622 +			else
   4.623 +				blkbuf[0] = '\0';
   4.624 +
   4.625 +			(void) printf("zdb_blkptr_cb: "
   4.626 +			    "Got error %d reading "
   4.627 +			    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
   4.628 +			    ioerr,
   4.629 +			    (u_longlong_t)zb->zb_objset,
   4.630 +			    (u_longlong_t)zb->zb_object,
   4.631 +			    (u_longlong_t)zb->zb_level,
   4.632 +			    (u_longlong_t)zb->zb_blkid,
   4.633 +			    blkbuf);
   4.634 +		}
   4.635 +	}
   4.636 +
   4.637 +	zcb->zcb_readfails = 0;
   4.638 +
   4.639 +	if (dump_opt['b'] >= 4) {
   4.640 +		sprintf_blkptr(blkbuf, bp);
   4.641 +		(void) printf("objset %llu object %llu "
   4.642 +		    "level %lld offset 0x%llx %s\n",
   4.643 +		    (u_longlong_t)zb->zb_objset,
   4.644 +		    (u_longlong_t)zb->zb_object,
   4.645 +		    (longlong_t)zb->zb_level,
   4.646 +		    (u_longlong_t)blkid2offset(dnp, bp, zb),
   4.647 +		    blkbuf);
   4.648 +	}
   4.649 +
   4.650  	return (0);
   4.651  }
   4.652  
   4.653 @@ -1512,169 +1865,90 @@
   4.654  };
   4.655  
   4.656  static void
   4.657 -zdb_leak_init(spa_t *spa)
   4.658 +zdb_ddt_leak_init(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
   4.659 +    zdb_cb_t *zcb)
   4.660  {
   4.661 -	vdev_t *rvd = spa->spa_root_vdev;
   4.662 +	uint64_t walk = 0;
   4.663 +	ddt_entry_t dde;
   4.664 +	int error;
   4.665  
   4.666 -	for (int c = 0; c < rvd->vdev_children; c++) {
   4.667 -		vdev_t *vd = rvd->vdev_child[c];
   4.668 -		for (int m = 0; m < vd->vdev_ms_count; m++) {
   4.669 -			metaslab_t *msp = vd->vdev_ms[m];
   4.670 -			mutex_enter(&msp->ms_lock);
   4.671 -			VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
   4.672 -			    SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
   4.673 -			msp->ms_map.sm_ppd = vd;
   4.674 -			mutex_exit(&msp->ms_lock);
   4.675 +	if (class == DDT_CLASS_UNIQUE || !ddt_object_exists(ddt, type, class))
   4.676 +		return;
   4.677 +
   4.678 +	while ((error = ddt_object_walk(ddt, type, class, &dde, &walk)) == 0) {
   4.679 +		blkptr_t blk;
   4.680 +		ddt_phys_t *ddp = dde.dde_phys;
   4.681 +		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
   4.682 +		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
   4.683 +			if (ddp->ddp_phys_birth == 0)
   4.684 +				continue;
   4.685 +			ddt_bp_create(ddt, &dde.dde_key, ddp, &blk);
   4.686 +			if (p == DDT_PHYS_DITTO) {
   4.687 +				zdb_count_block(ddt->ddt_spa, NULL, zcb, &blk,
   4.688 +				    ZDB_OT_DITTO);
   4.689 +			} else {
   4.690 +				zcb->zcb_dedup_asize +=
   4.691 +				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
   4.692 +				zcb->zcb_dedup_blocks++;
   4.693 +			}
   4.694 +		}
   4.695 +		if (!dump_opt['L']) {
   4.696 +			ddt_enter(ddt);
   4.697 +			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
   4.698 +			ddt_exit(ddt);
   4.699  		}
   4.700  	}
   4.701 +
   4.702 +	ASSERT(error == ENOENT);
   4.703 +}
   4.704 +
   4.705 +static void
   4.706 +zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
   4.707 +{
   4.708 +	if (!dump_opt['L']) {
   4.709 +		vdev_t *rvd = spa->spa_root_vdev;
   4.710 +		for (int c = 0; c < rvd->vdev_children; c++) {
   4.711 +			vdev_t *vd = rvd->vdev_child[c];
   4.712 +			for (int m = 0; m < vd->vdev_ms_count; m++) {
   4.713 +				metaslab_t *msp = vd->vdev_ms[m];
   4.714 +				mutex_enter(&msp->ms_lock);
   4.715 +				space_map_unload(&msp->ms_map);
   4.716 +				VERIFY(space_map_load(&msp->ms_map,
   4.717 +				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
   4.718 +				    spa->spa_meta_objset) == 0);
   4.719 +				msp->ms_map.sm_ppd = vd;
   4.720 +				mutex_exit(&msp->ms_lock);
   4.721 +			}
   4.722 +		}
   4.723 +	}
   4.724 +
   4.725 +	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
   4.726 +
   4.727 +	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
   4.728 +		for (enum ddt_type type = 0; type < DDT_TYPES; type++)
   4.729 +			for (enum ddt_class class = 0; class < DDT_CLASSES;
   4.730 +			    class++)
   4.731 +				zdb_ddt_leak_init(spa->spa_ddt[c],
   4.732 +				    type, class, zcb);
   4.733 +
   4.734 +	spa_config_exit(spa, SCL_CONFIG, FTAG);
   4.735  }
   4.736  
   4.737  static void
   4.738  zdb_leak_fini(spa_t *spa)
   4.739  {
   4.740 -	vdev_t *rvd = spa->spa_root_vdev;
   4.741 -
   4.742 -	for (int c = 0; c < rvd->vdev_children; c++) {
   4.743 -		vdev_t *vd = rvd->vdev_child[c];
   4.744 -		for (int m = 0; m < vd->vdev_ms_count; m++) {
   4.745 -			metaslab_t *msp = vd->vdev_ms[m];
   4.746 -			mutex_enter(&msp->ms_lock);
   4.747 -			space_map_unload(&msp->ms_map);
   4.748 -			mutex_exit(&msp->ms_lock);
   4.749 -		}
   4.750 -	}
   4.751 -}
   4.752 -
   4.753 -/*
   4.754 - * Verify that the sum of the sizes of all blocks in the pool adds up
   4.755 - * to the SPA's sa_alloc total.
   4.756 - */
   4.757 -typedef struct zdb_blkstats {
   4.758 -	uint64_t	zb_asize;
   4.759 -	uint64_t	zb_lsize;
   4.760 -	uint64_t	zb_psize;
   4.761 -	uint64_t	zb_count;
   4.762 -} zdb_blkstats_t;
   4.763 -
   4.764 -#define	DMU_OT_DEFERRED	DMU_OT_NONE
   4.765 -#define	DMU_OT_TOTAL	DMU_OT_NUMTYPES
   4.766 -
   4.767 -#define	ZB_TOTAL	DN_MAX_LEVELS
   4.768 -
   4.769 -typedef struct zdb_cb {
   4.770 -	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
   4.771 -	uint64_t	zcb_errors[256];
   4.772 -	int		zcb_readfails;
   4.773 -	int		zcb_haderrors;
   4.774 -} zdb_cb_t;
   4.775 -
   4.776 -static void
   4.777 -zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
   4.778 -{
   4.779 -	for (int i = 0; i < 4; i++) {
   4.780 -		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
   4.781 -		int t = (i & 1) ? type : DMU_OT_TOTAL;
   4.782 -		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
   4.783 -
   4.784 -		zb->zb_asize += BP_GET_ASIZE(bp);
   4.785 -		zb->zb_lsize += BP_GET_LSIZE(bp);
   4.786 -		zb->zb_psize += BP_GET_PSIZE(bp);
   4.787 -		zb->zb_count++;
   4.788 -	}
   4.789 -
   4.790 -	if (dump_opt['S']) {
   4.791 -		boolean_t print_sig;
   4.792 -
   4.793 -		print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
   4.794 -		    BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
   4.795 -
   4.796 -		if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
   4.797 -			print_sig = B_FALSE;
   4.798 -
   4.799 -		if (print_sig) {
   4.800 -			(void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
   4.801 -			    "%llx:%llx:%llx:%llx\n",
   4.802 -			    (u_longlong_t)BP_GET_LEVEL(bp),
   4.803 -			    (longlong_t)BP_GET_PSIZE(bp),
   4.804 -			    (longlong_t)BP_GET_NDVAS(bp),
   4.805 -			    ZDB_OT_NAME(BP_GET_TYPE(bp)),
   4.806 -			    ZDB_CHECKSUM_NAME(BP_GET_CHECKSUM(bp)),
   4.807 -			    ZDB_COMPRESS_NAME(BP_GET_COMPRESS(bp)),
   4.808 -			    (u_longlong_t)bp->blk_cksum.zc_word[0],
   4.809 -			    (u_longlong_t)bp->blk_cksum.zc_word[1],
   4.810 -			    (u_longlong_t)bp->blk_cksum.zc_word[2],
   4.811 -			    (u_longlong_t)bp->blk_cksum.zc_word[3]);
   4.812 -		}
   4.813 -	}
   4.814 -
   4.815 -	if (!dump_opt['L'])
   4.816 -		VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
   4.817 -		    NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
   4.818 -}
   4.819 -
   4.820 -static int
   4.821 -zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
   4.822 -    const dnode_phys_t *dnp, void *arg)
   4.823 -{
   4.824 -	zdb_cb_t *zcb = arg;
   4.825 -	char blkbuf[BP_SPRINTF_LEN];
   4.826 -	dmu_object_type_t type;
   4.827 -	boolean_t is_metadata;
   4.828 -
   4.829 -	if (bp == NULL)
   4.830 -		return (0);
   4.831 -
   4.832 -	type = BP_GET_TYPE(bp);
   4.833 -
   4.834 -	zdb_count_block(spa, zcb, bp, type);
   4.835 -
   4.836 -	is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
   4.837 -
   4.838 -	if (dump_opt['c'] > 1 || dump_opt['S'] ||
   4.839 -	    (dump_opt['c'] && is_metadata)) {
   4.840 -		size_t size = BP_GET_PSIZE(bp);
   4.841 -		void *data = malloc(size);
   4.842 -		int ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
   4.843 -		    NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
   4.844 -		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
   4.845 -		free(data);
   4.846 -
   4.847 -		/* We expect io errors on intent log */
   4.848 -		if (ioerr && type != DMU_OT_INTENT_LOG) {
   4.849 -			zcb->zcb_haderrors = 1;
   4.850 -			zcb->zcb_errors[ioerr]++;
   4.851 -
   4.852 -			if (dump_opt['b'] >= 2)
   4.853 -				sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
   4.854 -			else
   4.855 -				blkbuf[0] = '\0';
   4.856 -
   4.857 -			if (!dump_opt['S']) {
   4.858 -				(void) printf("zdb_blkptr_cb: "
   4.859 -				    "Got error %d reading "
   4.860 -				    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
   4.861 -				    ioerr,
   4.862 -				    (u_longlong_t)zb->zb_objset,
   4.863 -				    (u_longlong_t)zb->zb_object,
   4.864 -				    (u_longlong_t)zb->zb_level,
   4.865 -				    (u_longlong_t)zb->zb_blkid,
   4.866 -				    blkbuf);
   4.867 +	if (!dump_opt['L']) {
   4.868 +		vdev_t *rvd = spa->spa_root_vdev;
   4.869 +		for (int c = 0; c < rvd->vdev_children; c++) {
   4.870 +			vdev_t *vd = rvd->vdev_child[c];
   4.871 +			for (int m = 0; m < vd->vdev_ms_count; m++) {
   4.872 +				metaslab_t *msp = vd->vdev_ms[m];
   4.873 +				mutex_enter(&msp->ms_lock);
   4.874 +				space_map_unload(&msp->ms_map);
   4.875 +				mutex_exit(&msp->ms_lock);
   4.876  			}
   4.877  		}
   4.878  	}
   4.879 -
   4.880 -	zcb->zcb_readfails = 0;
   4.881 -
   4.882 -	if (dump_opt['b'] >= 4) {
   4.883 -		sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
   4.884 -		(void) printf("objset %llu object %llu offset 0x%llx %s\n",
   4.885 -		    (u_longlong_t)zb->zb_objset,
   4.886 -		    (u_longlong_t)zb->zb_object,
   4.887 -		    (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
   4.888 -		    blkbuf);
   4.889 -	}
   4.890 -
   4.891 -	return (0);
   4.892  }
   4.893  
   4.894  static int
   4.895 @@ -1682,19 +1956,15 @@
   4.896  {
   4.897  	zdb_cb_t zcb = { 0 };
   4.898  	zdb_blkstats_t *zb, *tzb;
   4.899 -	uint64_t alloc, space, logalloc;
   4.900 -	vdev_t *rvd = spa->spa_root_vdev;
   4.901 +	uint64_t norm_alloc, norm_space, total_alloc, total_found;
   4.902  	int leaks = 0;
   4.903 -	int c, e;
   4.904  
   4.905 -	if (!dump_opt['S']) {
   4.906 -		(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
   4.907 -		    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
   4.908 -		    (dump_opt['c'] == 1) ? "metadata " : "",
   4.909 -		    dump_opt['c'] ? "checksums " : "",
   4.910 -		    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
   4.911 -		    !dump_opt['L'] ? "nothing leaked " : "");
   4.912 -	}
   4.913 +	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
   4.914 +	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
   4.915 +	    (dump_opt['c'] == 1) ? "metadata " : "",
   4.916 +	    dump_opt['c'] ? "checksums " : "",
   4.917 +	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
   4.918 +	    !dump_opt['L'] ? "nothing leaked " : "");
   4.919  
   4.920  	/*
   4.921  	 * Load all space maps as SM_ALLOC maps, then traverse the pool
   4.922 @@ -1704,28 +1974,27 @@
   4.923  	 * it's not part of any space map) is a double allocation,
   4.924  	 * reference to a freed block, or an unclaimed log block.
   4.925  	 */
   4.926 -	if (!dump_opt['L'])
   4.927 -		zdb_leak_init(spa);
   4.928 +	zdb_leak_init(spa, &zcb);
   4.929  
   4.930  	/*
   4.931  	 * If there's a deferred-free bplist, process that first.
   4.932  	 */
   4.933 -	if (spa->spa_sync_bplist_obj != 0) {
   4.934 -		bplist_t *bpl = &spa->spa_sync_bplist;
   4.935 +	if (spa->spa_deferred_bplist_obj != 0) {
   4.936 +		bplist_t *bpl = &spa->spa_deferred_bplist;
   4.937  		blkptr_t blk;
   4.938  		uint64_t itor = 0;
   4.939  
   4.940  		VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
   4.941 -		    spa->spa_sync_bplist_obj));
   4.942 +		    spa->spa_deferred_bplist_obj));
   4.943  
   4.944  		while (bplist_iterate(bpl, &itor, &blk) == 0) {
   4.945  			if (dump_opt['b'] >= 4) {
   4.946  				char blkbuf[BP_SPRINTF_LEN];
   4.947 -				sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
   4.948 +				sprintf_blkptr(blkbuf, &blk);
   4.949  				(void) printf("[%s] %s\n",
   4.950  				    "deferred free", blkbuf);
   4.951  			}
   4.952 -			zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
   4.953 +			zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED);
   4.954  		}
   4.955  
   4.956  		bplist_close(bpl);
   4.957 @@ -1733,10 +2002,10 @@
   4.958  
   4.959  	zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb, 0);
   4.960  
   4.961 -	if (zcb.zcb_haderrors && !dump_opt['S']) {
   4.962 +	if (zcb.zcb_haderrors) {
   4.963  		(void) printf("\nError counts:\n\n");
   4.964  		(void) printf("\t%5s  %s\n", "errno", "count");
   4.965 -		for (e = 0; e < 256; e++) {
   4.966 +		for (int e = 0; e < 256; e++) {
   4.967  			if (zcb.zcb_errors[e] != 0) {
   4.968  				(void) printf("\t%5d  %llu\n",
   4.969  				    e, (u_longlong_t)zcb.zcb_errors[e]);
   4.970 @@ -1747,43 +2016,27 @@
   4.971  	/*
   4.972  	 * Report any leaked segments.
   4.973  	 */
   4.974 -	if (!dump_opt['L'])
   4.975 -		zdb_leak_fini(spa);
   4.976 +	zdb_leak_fini(spa);
   4.977  
   4.978 -	/*
   4.979 -	 * If we're interested in printing out the blkptr signatures,
   4.980 -	 * return now as we don't print out anything else (including
   4.981 -	 * errors and leaks).
   4.982 -	 */
   4.983 -	if (dump_opt['S'])
   4.984 -		return (zcb.zcb_haderrors ? 3 : 0);
   4.985 +	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
   4.986  
   4.987 -	alloc = spa_get_alloc(spa);
   4.988 -	space = spa_get_space(spa);
   4.989 +	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
   4.990 +	norm_space = metaslab_class_get_space(spa_normal_class(spa));
   4.991  
   4.992 -	/*
   4.993 -	 * Log blocks allocated from a separate log device don't count
   4.994 -	 * as part of the normal pool space; factor them in here.
   4.995 -	 */
   4.996 -	logalloc = 0;
   4.997 +	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
   4.998 +	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
   4.999  
  4.1000 -	for (c = 0; c < rvd->vdev_children; c++)
  4.1001 -		if (rvd->vdev_child[c]->vdev_islog)
  4.1002 -			logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
  4.1003 -
  4.1004 -	tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
  4.1005 -
  4.1006 -	if (tzb->zb_asize == alloc + logalloc) {
  4.1007 +	if (total_found == total_alloc) {
  4.1008  		if (!dump_opt['L'])
  4.1009  			(void) printf("\n\tNo leaks (block sum matches space"
  4.1010  			    " maps exactly)\n");
  4.1011  	} else {
  4.1012  		(void) printf("block traversal size %llu != alloc %llu "
  4.1013  		    "(%s %lld)\n",
  4.1014 -		    (u_longlong_t)tzb->zb_asize,
  4.1015 -		    (u_longlong_t)alloc + logalloc,
  4.1016 +		    (u_longlong_t)total_found,
  4.1017 +		    (u_longlong_t)total_alloc,
  4.1018  		    (dump_opt['L']) ? "unreachable" : "leaked",
  4.1019 -		    (longlong_t)(alloc + logalloc - tzb->zb_asize));
  4.1020 +		    (longlong_t)(total_alloc - total_found));
  4.1021  		leaks = 1;
  4.1022  	}
  4.1023  
  4.1024 @@ -1793,33 +2046,40 @@
  4.1025  	(void) printf("\n");
  4.1026  	(void) printf("\tbp count:      %10llu\n",
  4.1027  	    (u_longlong_t)tzb->zb_count);
  4.1028 -	(void) printf("\tbp logical:    %10llu\t avg: %6llu\n",
  4.1029 +	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
  4.1030  	    (u_longlong_t)tzb->zb_lsize,
  4.1031  	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
  4.1032 -	(void) printf("\tbp physical:   %10llu\t avg:"
  4.1033 -	    " %6llu\tcompression: %6.2f\n",
  4.1034 +	(void) printf("\tbp physical:   %10llu      avg:"
  4.1035 +	    " %6llu     compression: %6.2f\n",
  4.1036  	    (u_longlong_t)tzb->zb_psize,
  4.1037  	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
  4.1038  	    (double)tzb->zb_lsize / tzb->zb_psize);
  4.1039 -	(void) printf("\tbp allocated:  %10llu\t avg:"
  4.1040 -	    " %6llu\tcompression: %6.2f\n",
  4.1041 +	(void) printf("\tbp allocated:  %10llu      avg:"
  4.1042 +	    " %6llu     compression: %6.2f\n",
  4.1043  	    (u_longlong_t)tzb->zb_asize,
  4.1044  	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
  4.1045  	    (double)tzb->zb_lsize / tzb->zb_asize);
  4.1046 -	(void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
  4.1047 -	    (u_longlong_t)alloc, 100.0 * alloc / space);
  4.1048 +	(void) printf("\tbp deduped:    %10llu    ref>1:"
  4.1049 +	    " %6llu   deduplication: %6.2f\n",
  4.1050 +	    (u_longlong_t)zcb.zcb_dedup_asize,
  4.1051 +	    (u_longlong_t)zcb.zcb_dedup_blocks,
  4.1052 +	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
  4.1053 +	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
  4.1054 +	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
  4.1055  
  4.1056  	if (dump_opt['b'] >= 2) {
  4.1057  		int l, t, level;
  4.1058  		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
  4.1059  		    "\t  avg\t comp\t%%Total\tType\n");
  4.1060  
  4.1061 -		for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
  4.1062 +		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
  4.1063  			char csize[6], lsize[6], psize[6], asize[6], avg[6];
  4.1064  			char *typename;
  4.1065  
  4.1066 -			typename = t == DMU_OT_DEFERRED ? "deferred free" :
  4.1067 -			    t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
  4.1068 +			if (t < DMU_OT_NUMTYPES)
  4.1069 +				typename = dmu_ot[t].ot_name;
  4.1070 +			else
  4.1071 +				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
  4.1072  
  4.1073  			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
  4.1074  				(void) printf("%6s\t%5s\t%5s\t%5s"
  4.1075 @@ -1881,11 +2141,115 @@
  4.1076  	return (0);
  4.1077  }
  4.1078  
  4.1079 +typedef struct zdb_ddt_entry {
  4.1080 +	ddt_key_t	zdde_key;
  4.1081 +	uint64_t	zdde_ref_blocks;
  4.1082 +	uint64_t	zdde_ref_lsize;
  4.1083 +	uint64_t	zdde_ref_psize;
  4.1084 +	uint64_t	zdde_ref_dsize;
  4.1085 +	avl_node_t	zdde_node;
  4.1086 +} zdb_ddt_entry_t;
  4.1087 +
  4.1088 +/* ARGSUSED */
  4.1089 +static int
  4.1090 +zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
  4.1091 +    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
  4.1092 +{
  4.1093 +	avl_tree_t *t = arg;
  4.1094 +	avl_index_t where;
  4.1095 +	zdb_ddt_entry_t *zdde, zdde_search;
  4.1096 +
  4.1097 +	if (bp == NULL)
  4.1098 +		return (0);
  4.1099 +
  4.1100 +	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
  4.1101 +		(void) printf("traversing objset %llu, %llu objects, "
  4.1102 +		    "%lu blocks so far\n",
  4.1103 +		    (u_longlong_t)zb->zb_objset,
  4.1104 +		    (u_longlong_t)bp->blk_fill,
  4.1105 +		    avl_numnodes(t));
  4.1106 +	}
  4.1107 +
  4.1108 +	if (BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
  4.1109 +		return (0);
  4.1110 +
  4.1111 +	ddt_key_fill(&zdde_search.zdde_key, bp);
  4.1112 +
  4.1113 +	zdde = avl_find(t, &zdde_search, &where);
  4.1114 +
  4.1115 +	if (zdde == NULL) {
  4.1116 +		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
  4.1117 +		zdde->zdde_key = zdde_search.zdde_key;
  4.1118 +		avl_insert(t, zdde, where);
  4.1119 +	}
  4.1120 +
  4.1121 +	zdde->zdde_ref_blocks += 1;
  4.1122 +	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
  4.1123 +	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
  4.1124 +	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
  4.1125 +
  4.1126 +	return (0);
  4.1127 +}
  4.1128 +
  4.1129 +static void
  4.1130 +dump_simulated_ddt(spa_t *spa)
  4.1131 +{
  4.1132 +	avl_tree_t t;
  4.1133 +	void *cookie = NULL;
  4.1134 +	zdb_ddt_entry_t *zdde;
  4.1135 +	ddt_histogram_t ddh_total = { 0 };
  4.1136 +	ddt_stat_t dds_total = { 0 };
  4.1137 +
  4.1138 +	avl_create(&t, ddt_entry_compare,
  4.1139 +	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
  4.1140 +
  4.1141 +	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
  4.1142 +
  4.1143 +	(void) traverse_pool(spa, zdb_ddt_add_cb, &t, 0);
  4.1144 +
  4.1145 +	spa_config_exit(spa, SCL_CONFIG, FTAG);
  4.1146 +
  4.1147 +	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
  4.1148 +		ddt_stat_t dds;
  4.1149 +		uint64_t refcnt = zdde->zdde_ref_blocks;
  4.1150 +		ASSERT(refcnt != 0);
  4.1151 +
  4.1152 +		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
  4.1153 +		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
  4.1154 +		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
  4.1155 +		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
  4.1156 +
  4.1157 +		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
  4.1158 +		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
  4.1159 +		dds.dds_ref_psize = zdde->zdde_ref_psize;
  4.1160 +		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
  4.1161 +
  4.1162 +		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
  4.1163 +
  4.1164 +		umem_free(zdde, sizeof (*zdde));
  4.1165 +	}
  4.1166 +
  4.1167 +	avl_destroy(&t);
  4.1168 +
  4.1169 +	ddt_histogram_stat(&dds_total, &ddh_total);
  4.1170 +
  4.1171 +	(void) printf("Simulated DDT histogram:\n");
  4.1172 +
  4.1173 +	dump_ddt_histogram(&ddh_total);
  4.1174 +
  4.1175 +	dump_dedup_ratio(&dds_total);
  4.1176 +}
  4.1177 +
  4.1178  static void
  4.1179  dump_zpool(spa_t *spa)
  4.1180  {
  4.1181  	dsl_pool_t *dp = spa_get_dsl(spa);
  4.1182  	int rc = 0;
  4.1183 +
  4.1184 +	if (dump_opt['S']) {
  4.1185 +		dump_simulated_ddt(spa);
  4.1186 +		return;
  4.1187 +	}
  4.1188  
  4.1189  	if (!dump_opt['e'] && dump_opt['C'] > 1) {
  4.1190  		(void) printf("\nCached configuration:\n");
  4.1191 @@ -1898,6 +2262,9 @@
  4.1192  	if (dump_opt['u'])
  4.1193  		dump_uberblock(&spa->spa_uberblock);
  4.1194  
  4.1195 +	if (dump_opt['D'])
  4.1196 +		dump_all_ddts(spa);
  4.1197 +
  4.1198  	if (dump_opt['d'] > 2 || dump_opt['m'])
  4.1199  		dump_metaslabs(spa);
  4.1200  
  4.1201 @@ -1905,13 +2272,13 @@
  4.1202  		dump_dir(dp->dp_meta_objset);
  4.1203  		if (dump_opt['d'] >= 3) {
  4.1204  			dump_bplist(dp->dp_meta_objset,
  4.1205 -			    spa->spa_sync_bplist_obj, "Deferred frees");
  4.1206 +			    spa->spa_deferred_bplist_obj, "Deferred frees");
  4.1207  			dump_dtl(spa->spa_root_vdev, 0);
  4.1208  		}
  4.1209  		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
  4.1210  		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
  4.1211  	}
  4.1212 -	if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
  4.1213 +	if (dump_opt['b'] || dump_opt['c'])
  4.1214  		rc = dump_block_stats(spa);
  4.1215  
  4.1216  	if (dump_opt['s'])
  4.1217 @@ -1938,51 +2305,13 @@
  4.1218  static void
  4.1219  zdb_print_blkptr(blkptr_t *bp, int flags)
  4.1220  {
  4.1221 -	dva_t *dva = bp->blk_dva;
  4.1222 -	int d;
  4.1223 +	char blkbuf[BP_SPRINTF_LEN];
  4.1224  
  4.1225  	if (flags & ZDB_FLAG_BSWAP)
  4.1226  		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
  4.1227 -	/*
  4.1228 -	 * Super-ick warning:  This code is also duplicated in
  4.1229 -	 * cmd/mdb/common/modules/zfs/zfs.c .  Yeah, I hate code
  4.1230 -	 * replication, too.
  4.1231 -	 */
  4.1232 -	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
  4.1233 -		(void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
  4.1234 -		    (longlong_t)DVA_GET_VDEV(&dva[d]),
  4.1235 -		    (longlong_t)DVA_GET_OFFSET(&dva[d]));
  4.1236 -		(void) printf("\tDVA[%d]:       GANG: %-5s  GRID:  %04llx\t"
  4.1237 -		    "ASIZE: %llx\n", d,
  4.1238 -		    DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
  4.1239 -		    (longlong_t)DVA_GET_GRID(&dva[d]),
  4.1240 -		    (longlong_t)DVA_GET_ASIZE(&dva[d]));
  4.1241 -		(void) printf("\tDVA[%d]: %llu:%llx:%llx:%s%s%s%s\n", d,
  4.1242 -		    (u_longlong_t)DVA_GET_VDEV(&dva[d]),
  4.1243 -		    (longlong_t)DVA_GET_OFFSET(&dva[d]),
  4.1244 -		    (longlong_t)BP_GET_PSIZE(bp),
  4.1245 -		    BP_SHOULD_BYTESWAP(bp) ? "e" : "",
  4.1246 -		    !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
  4.1247 -		    "d" : "",
  4.1248 -		    DVA_GET_GANG(&dva[d]) ? "g" : "",
  4.1249 -		    BP_GET_COMPRESS(bp) != 0 ? "d" : "");
  4.1250 -	}
  4.1251 -	(void) printf("\tLSIZE:  %-16llx\t\tPSIZE: %llx\n",
  4.1252 -	    (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
  4.1253 -	(void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
  4.1254 -	    BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
  4.1255 -	    ZDB_OT_NAME(BP_GET_TYPE(bp)));
  4.1256 -	(void) printf("\tBIRTH:  %-16llx   LEVEL: %-2llu\tFILL:  %llx\n",
  4.1257 -	    (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
  4.1258 -	    (u_longlong_t)bp->blk_fill);
  4.1259 -	(void) printf("\tCKFUNC: %-16s\t\tCOMP:  %s\n",
  4.1260 -	    ZDB_CHECKSUM_NAME(BP_GET_CHECKSUM(bp)),
  4.1261 -	    ZDB_COMPRESS_NAME(BP_GET_COMPRESS(bp)));
  4.1262 -	(void) printf("\tCKSUM:  %llx:%llx:%llx:%llx\n",
  4.1263 -	    (u_longlong_t)bp->blk_cksum.zc_word[0],
  4.1264 -	    (u_longlong_t)bp->blk_cksum.zc_word[1],
  4.1265 -	    (u_longlong_t)bp->blk_cksum.zc_word[2],
  4.1266 -	    (u_longlong_t)bp->blk_cksum.zc_word[3]);
  4.1267 +
  4.1268 +	sprintf_blkptr(blkbuf, bp);
  4.1269 +	(void) printf("%s\n", blkbuf);
  4.1270  }
  4.1271  
  4.1272  static void
  4.1273 @@ -2005,7 +2334,7 @@
  4.1274  {
  4.1275  	if (flags & ZDB_FLAG_BSWAP)
  4.1276  		byteswap_uint64_array(buf, size);
  4.1277 -	(void) write(2, buf, size);
  4.1278 +	(void) write(1, buf, size);
  4.1279  }
  4.1280  
  4.1281  static void
  4.1282 @@ -2108,10 +2437,10 @@
  4.1283   *	flags          - A string of characters specifying options
  4.1284   *		 b: Decode a blkptr at given offset within block
  4.1285   *		*c: Calculate and display checksums
  4.1286 - *		*d: Decompress data before dumping
  4.1287 + *		 d: Decompress data before dumping
  4.1288   *		 e: Byteswap data before dumping
  4.1289 - *		*g: Display data as a gang block header
  4.1290 - *		*i: Display as an indirect block
  4.1291 + *		 g: Display data as a gang block header
  4.1292 + *		 i: Display as an indirect block
  4.1293   *		 p: Do I/O to physical offset
  4.1294   *		 r: Dump raw data to stdout
  4.1295   *
  4.1296 @@ -2120,13 +2449,15 @@
  4.1297  static void
  4.1298  zdb_read_block(char *thing, spa_t *spa)
  4.1299  {
  4.1300 +	blkptr_t blk, *bp = &blk;
  4.1301 +	dva_t *dva = bp->blk_dva;
  4.1302  	int flags = 0;
  4.1303 -	uint64_t offset = 0, size = 0, blkptr_offset = 0;
  4.1304 +	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
  4.1305  	zio_t *zio;
  4.1306  	vdev_t *vd;
  4.1307 -	void *buf;
  4.1308 +	void *pbuf, *lbuf, *buf;
  4.1309  	char *s, *p, *dup, *vdev, *flagstr;
  4.1310 -	int i, error, zio_flags;
  4.1311 +	int i, error;
  4.1312  
  4.1313  	dup = strdup(thing);
  4.1314  	s = strtok(dup, ":");
  4.1315 @@ -2163,7 +2494,7 @@
  4.1316  			flags |= bit;
  4.1317  
  4.1318  			/* If it's not something with an argument, keep going */
  4.1319 -			if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
  4.1320 +			if ((bit & (ZDB_FLAG_CHECKSUM |
  4.1321  			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
  4.1322  				continue;
  4.1323  
  4.1324 @@ -2185,28 +2516,110 @@
  4.1325  		return;
  4.1326  	} else {
  4.1327  		if (vd->vdev_path)
  4.1328 -			(void) printf("Found vdev: %s\n", vd->vdev_path);
  4.1329 +			(void) fprintf(stderr, "Found vdev: %s\n",
  4.1330 +			    vd->vdev_path);
  4.1331  		else
  4.1332 -			(void) printf("Found vdev type: %s\n",
  4.1333 +			(void) fprintf(stderr, "Found vdev type: %s\n",
  4.1334  			    vd->vdev_ops->vdev_op_type);
  4.1335  	}
  4.1336  
  4.1337 -	buf = umem_alloc(size, UMEM_NOFAIL);
  4.1338 +	psize = size;
  4.1339 +	lsize = size;
  4.1340  
  4.1341 -	zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
  4.1342 -	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
  4.1343 +	pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
  4.1344 +	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
  4.1345 +
  4.1346 +	BP_ZERO(bp);
  4.1347 +
  4.1348 +	DVA_SET_VDEV(&dva[0], vd->vdev_id);
  4.1349 +	DVA_SET_OFFSET(&dva[0], offset);
  4.1350 +	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
  4.1351 +	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
  4.1352 +
  4.1353 +	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
  4.1354 +
  4.1355 +	BP_SET_LSIZE(bp, lsize);
  4.1356 +	BP_SET_PSIZE(bp, psize);
  4.1357 +	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
  4.1358 +	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
  4.1359 +	BP_SET_TYPE(bp, DMU_OT_NONE);
  4.1360 +	BP_SET_LEVEL(bp, 0);
  4.1361 +	BP_SET_DEDUP(bp, 0);
  4.1362 +	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
  4.1363  
  4.1364  	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
  4.1365  	zio = zio_root(spa, NULL, NULL, 0);
  4.1366 -	/* XXX todo - cons up a BP so RAID-Z will be happy */
  4.1367 -	zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
  4.1368 -	    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
  4.1369 +
  4.1370 +	if (vd == vd->vdev_top) {
  4.1371 +		/*
  4.1372 +		 * Treat this as a normal block read.
  4.1373 +		 */
  4.1374 +		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
  4.1375 +		    ZIO_PRIORITY_SYNC_READ,
  4.1376 +		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
  4.1377 +	} else {
  4.1378 +		/*
  4.1379 +		 * Treat this as a vdev child I/O.
  4.1380 +		 */
  4.1381 +		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
  4.1382 +		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
  4.1383 +		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
  4.1384 +		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
  4.1385 +		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
  4.1386 +	}
  4.1387 +
  4.1388  	error = zio_wait(zio);
  4.1389  	spa_config_exit(spa, SCL_STATE, FTAG);
  4.1390  
  4.1391  	if (error) {
  4.1392  		(void) printf("Read of %s failed, error: %d\n", thing, error);
  4.1393  		goto out;
  4.1394 +	}
  4.1395 +
  4.1396 +	if (flags & ZDB_FLAG_DECOMPRESS) {
  4.1397 +		/*
  4.1398 +		 * We don't know how the data was compressed, so just try
  4.1399 +		 * every decompress function at every inflated blocksize.
  4.1400 +		 */
  4.1401 +		enum zio_compress c;
  4.1402 +		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
  4.1403 +		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
  4.1404 +
  4.1405 +		bcopy(pbuf, pbuf2, psize);
  4.1406 +
  4.1407 +		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
  4.1408 +		    SPA_MAXBLOCKSIZE - psize) == 0);
  4.1409 +
  4.1410 +		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
  4.1411 +		    SPA_MAXBLOCKSIZE - psize) == 0);
  4.1412 +
  4.1413 +		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
  4.1414 +		    lsize -= SPA_MINBLOCKSIZE) {
  4.1415 +			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
  4.1416 +				if (zio_decompress_data(c, pbuf, lbuf,
  4.1417 +				    psize, lsize) == 0 &&
  4.1418 +				    zio_decompress_data(c, pbuf2, lbuf2,
  4.1419 +				    psize, lsize) == 0 &&
  4.1420 +				    bcmp(lbuf, lbuf2, lsize) == 0)
  4.1421 +					break;
  4.1422 +			}
  4.1423 +			if (c != ZIO_COMPRESS_FUNCTIONS)
  4.1424 +				break;
  4.1425 +			lsize -= SPA_MINBLOCKSIZE;
  4.1426 +		}
  4.1427 +
  4.1428 +		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
  4.1429 +		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
  4.1430 +
  4.1431 +		if (lsize <= psize) {
  4.1432 +			(void) printf("Decompress of %s failed\n", thing);
  4.1433 +			goto out;
  4.1434 +		}
  4.1435 +		buf = lbuf;
  4.1436 +		size = lsize;
  4.1437 +	} else {
  4.1438 +		buf = pbuf;
  4.1439 +		size = psize;
  4.1440  	}
  4.1441  
  4.1442  	if (flags & ZDB_FLAG_PRINT_BLKPTR)
  4.1443 @@ -2223,7 +2636,8 @@
  4.1444  		zdb_dump_block(thing, buf, size, flags);
  4.1445  
  4.1446  out:
  4.1447 -	umem_free(buf, size);
  4.1448 +	umem_free(pbuf, SPA_MAXBLOCKSIZE);
  4.1449 +	umem_free(lbuf, SPA_MAXBLOCKSIZE);
  4.1450  	free(dup);
  4.1451  }
  4.1452  
  4.1453 @@ -2312,7 +2726,6 @@
  4.1454  	struct rlimit rl = { 1024, 1024 };
  4.1455  	spa_t *spa = NULL;
  4.1456  	objset_t *os = NULL;
  4.1457 -	char *endstr;
  4.1458  	int dump_all = 1;
  4.1459  	int verbose = 0;
  4.1460  	int error;
  4.1461 @@ -2327,19 +2740,21 @@
  4.1462  
  4.1463  	dprintf_setup(&argc, argv);
  4.1464  
  4.1465 -	while ((c = getopt(argc, argv, "udhibcmsvCLS:RU:lep:t:")) != -1) {
  4.1466 +	while ((c = getopt(argc, argv, "bcdhilmsuCDRSLevp:t:U:")) != -1) {
  4.1467  		switch (c) {
  4.1468 -		case 'u':
  4.1469 -		case 'd':
  4.1470 -		case 'i':
  4.1471 -		case 'h':
  4.1472  		case 'b':
  4.1473  		case 'c':
  4.1474 +		case 'd':
  4.1475 +		case 'h':
  4.1476 +		case 'i':
  4.1477 +		case 'l':
  4.1478  		case 'm':
  4.1479  		case 's':
  4.1480 +		case 'u':
  4.1481  		case 'C':
  4.1482 -		case 'l':
  4.1483 +		case 'D':
  4.1484  		case 'R':
  4.1485 +		case 'S':
  4.1486  			dump_opt[c]++;
  4.1487  			dump_all = 0;
  4.1488  			break;
  4.1489 @@ -2349,9 +2764,6 @@
  4.1490  			break;
  4.1491  		case 'v':
  4.1492  			verbose++;
  4.1493 -			break;
  4.1494 -		case 'U':
  4.1495 -			spa_config_path = optarg;
  4.1496  			break;
  4.1497  		case 'p':
  4.1498  			if (searchdirs == NULL) {
  4.1499 @@ -2368,24 +2780,6 @@
  4.1500  			}
  4.1501  			searchdirs[nsearch++] = optarg;
  4.1502  			break;
  4.1503 -		case 'S':
  4.1504 -			dump_opt[c]++;
  4.1505 -			dump_all = 0;
  4.1506 -			zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
  4.1507 -			if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
  4.1508 -				usage();
  4.1509 -			endstr = strchr(optarg, ':') + 1;
  4.1510 -			if (strcmp(endstr, "fletcher2") == 0)
  4.1511 -				zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
  4.1512 -			else if (strcmp(endstr, "fletcher4") == 0)
  4.1513 -				zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
  4.1514 -			else if (strcmp(endstr, "sha256") == 0)
  4.1515 -				zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
  4.1516 -			else if (strcmp(endstr, "all") == 0)
  4.1517 -				zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
  4.1518 -			else
  4.1519 -				usage();
  4.1520 -			break;
  4.1521  		case 't':
  4.1522  			max_txg = strtoull(optarg, NULL, 0);
  4.1523  			if (max_txg < TXG_INITIAL) {
  4.1524 @@ -2393,6 +2787,9 @@
  4.1525  				    "specified: %s\n", optarg);
  4.1526  				usage();
  4.1527  			}
  4.1528 +			break;
  4.1529 +		case 'U':
  4.1530 +			spa_config_path = optarg;
  4.1531  			break;
  4.1532  		default:
  4.1533  			usage();
  4.1534 @@ -2409,8 +2806,11 @@
  4.1535  	g_zfs = libzfs_init();
  4.1536  	ASSERT(g_zfs != NULL);
  4.1537  
  4.1538 +	if (dump_all)
  4.1539 +		verbose = MAX(verbose, 1);
  4.1540 +
  4.1541  	for (c = 0; c < 256; c++) {
  4.1542 -		if (dump_all && !strchr("elLR", c))
  4.1543 +		if (dump_all && !strchr("elLRS", c))
  4.1544  			dump_opt[c] = 1;
  4.1545  		if (dump_opt[c])
  4.1546  			dump_opt[c] += verbose;
     5.1 --- a/usr/src/cmd/zdb/zdb_il.c	Fri Oct 30 18:47:17 2009 -0600
     5.2 +++ b/usr/src/cmd/zdb/zdb_il.c	Sun Nov 01 14:14:46 2009 -0800
     5.3 @@ -40,12 +40,14 @@
     5.4  
     5.5  extern uint8_t dump_opt[256];
     5.6  
     5.7 +static char prefix[4] = "\t\t\t";
     5.8 +
     5.9  static void
    5.10  print_log_bp(const blkptr_t *bp, const char *prefix)
    5.11  {
    5.12  	char blkbuf[BP_SPRINTF_LEN];
    5.13  
    5.14 -	sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
    5.15 +	sprintf_blkptr(blkbuf, bp);
    5.16  	(void) printf("%s%s\n", prefix, blkbuf);
    5.17  }
    5.18  
    5.19 @@ -58,15 +60,15 @@
    5.20  	char *link = name + strlen(name) + 1;
    5.21  
    5.22  	if (txtype == TX_SYMLINK)
    5.23 -		(void) printf("\t\t\t%s -> %s\n", name, link);
    5.24 +		(void) printf("%s%s -> %s\n", prefix, name, link);
    5.25  	else
    5.26 -		(void) printf("\t\t\t%s\n", name);
    5.27 +		(void) printf("%s%s\n", prefix, name);
    5.28  
    5.29 -	(void) printf("\t\t\t%s", ctime(&crtime));
    5.30 -	(void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
    5.31 +	(void) printf("%s%s", prefix, ctime(&crtime));
    5.32 +	(void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
    5.33  	    (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
    5.34  	    (longlong_t)lr->lr_mode);
    5.35 -	(void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
    5.36 +	(void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
    5.37  	    (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
    5.38  	    (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
    5.39  }
    5.40 @@ -75,7 +77,7 @@
    5.41  static void
    5.42  zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
    5.43  {
    5.44 -	(void) printf("\t\t\tdoid %llu, name %s\n",
    5.45 +	(void) printf("%sdoid %llu, name %s\n", prefix,
    5.46  	    (u_longlong_t)lr->lr_doid, (char *)(lr + 1));
    5.47  }
    5.48  
    5.49 @@ -83,7 +85,7 @@
    5.50  static void
    5.51  zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
    5.52  {
    5.53 -	(void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
    5.54 +	(void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
    5.55  	    (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
    5.56  	    (char *)(lr + 1));
    5.57  }
    5.58 @@ -95,9 +97,9 @@
    5.59  	char *snm = (char *)(lr + 1);
    5.60  	char *tnm = snm + strlen(snm) + 1;
    5.61  
    5.62 -	(void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
    5.63 +	(void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
    5.64  	    (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
    5.65 -	(void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
    5.66 +	(void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
    5.67  }
    5.68  
    5.69  /* ARGSUSED */
    5.70 @@ -106,44 +108,48 @@
    5.71  {
    5.72  	char *data, *dlimit;
    5.73  	blkptr_t *bp = &lr->lr_blkptr;
    5.74 +	zbookmark_t zb;
    5.75  	char buf[SPA_MAXBLOCKSIZE];
    5.76  	int verbose = MAX(dump_opt['d'], dump_opt['i']);
    5.77  	int error;
    5.78  
    5.79 -	(void) printf("\t\t\tfoid %llu, offset 0x%llx,"
    5.80 -	    " length 0x%llx, blkoff 0x%llx\n",
    5.81 -	    (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
    5.82 -	    (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
    5.83 +	(void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
    5.84 +	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
    5.85 +	    (u_longlong_t)lr->lr_length);
    5.86  
    5.87  	if (txtype == TX_WRITE2 || verbose < 5)
    5.88  		return;
    5.89  
    5.90  	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
    5.91 -		(void) printf("\t\t\thas blkptr, %s\n",
    5.92 +		(void) printf("%shas blkptr, %s\n", prefix,
    5.93  		    bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
    5.94  		    "will claim" : "won't claim");
    5.95 -		print_log_bp(bp, "\t\t\t");
    5.96 +		print_log_bp(bp, prefix);
    5.97 +
    5.98  		if (BP_IS_HOLE(bp)) {
    5.99  			(void) printf("\t\t\tLSIZE 0x%llx\n",
   5.100  			    (u_longlong_t)BP_GET_LSIZE(bp));
   5.101  		}
   5.102  		if (bp->blk_birth == 0) {
   5.103  			bzero(buf, sizeof (buf));
   5.104 -		} else {
   5.105 -			zbookmark_t zb;
   5.106 +			(void) printf("%s<hole>\n", prefix);
   5.107 +			return;
   5.108 +		}
   5.109 +		if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
   5.110 +			(void) printf("%s<block already committed>\n", prefix);
   5.111 +			return;
   5.112 +		}
   5.113  
   5.114 -			zb.zb_objset = dmu_objset_id(zilog->zl_os);
   5.115 -			zb.zb_object = lr->lr_foid;
   5.116 -			zb.zb_level = 0;
   5.117 -			zb.zb_blkid = -1; /* unknown */
   5.118 +		SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
   5.119 +		    lr->lr_foid, ZB_ZIL_LEVEL,
   5.120 +		    lr->lr_offset / BP_GET_LSIZE(bp));
   5.121  
   5.122 -			error = zio_wait(zio_read(NULL, zilog->zl_spa,
   5.123 -			    bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
   5.124 -			    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
   5.125 -			if (error)
   5.126 -				return;
   5.127 -		}
   5.128 -		data = buf + lr->lr_blkoff;
   5.129 +		error = zio_wait(zio_read(NULL, zilog->zl_spa,
   5.130 +		    bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
   5.131 +		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
   5.132 +		if (error)
   5.133 +			return;
   5.134 +		data = buf;
   5.135  	} else {
   5.136  		data = (char *)(lr + 1);
   5.137  	}
   5.138 @@ -151,7 +157,7 @@
   5.139  	dlimit = data + MIN(lr->lr_length,
   5.140  	    (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
   5.141  
   5.142 -	(void) printf("\t\t\t");
   5.143 +	(void) printf("%s", prefix);
   5.144  	while (data < dlimit) {
   5.145  		if (isprint(*data))
   5.146  			(void) printf("%c ", *data);
   5.147 @@ -166,7 +172,7 @@
   5.148  static void
   5.149  zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
   5.150  {
   5.151 -	(void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
   5.152 +	(void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
   5.153  	    (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
   5.154  	    (u_longlong_t)lr->lr_length);
   5.155  }
   5.156 @@ -178,38 +184,38 @@
   5.157  	time_t atime = (time_t)lr->lr_atime[0];
   5.158  	time_t mtime = (time_t)lr->lr_mtime[0];
   5.159  
   5.160 -	(void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
   5.161 +	(void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
   5.162  	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
   5.163  
   5.164  	if (lr->lr_mask & AT_MODE) {
   5.165 -		(void) printf("\t\t\tAT_MODE  %llo\n",
   5.166 +		(void) printf("%sAT_MODE  %llo\n", prefix,
   5.167  		    (longlong_t)lr->lr_mode);
   5.168  	}
   5.169  
   5.170  	if (lr->lr_mask & AT_UID) {
   5.171 -		(void) printf("\t\t\tAT_UID   %llu\n",
   5.172 +		(void) printf("%sAT_UID   %llu\n", prefix,
   5.173  		    (u_longlong_t)lr->lr_uid);
   5.174  	}
   5.175  
   5.176  	if (lr->lr_mask & AT_GID) {
   5.177 -		(void) printf("\t\t\tAT_GID   %llu\n",
   5.178 +		(void) printf("%sAT_GID   %llu\n", prefix,
   5.179  		    (u_longlong_t)lr->lr_gid);
   5.180  	}
   5.181  
   5.182  	if (lr->lr_mask & AT_SIZE) {
   5.183 -		(void) printf("\t\t\tAT_SIZE  %llu\n",
   5.184 +		(void) printf("%sAT_SIZE  %llu\n", prefix,
   5.185  		    (u_longlong_t)lr->lr_size);
   5.186  	}
   5.187  
   5.188  	if (lr->lr_mask & AT_ATIME) {
   5.189 -		(void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
   5.190 +		(void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
   5.191  		    (u_longlong_t)lr->lr_atime[0],
   5.192  		    (u_longlong_t)lr->lr_atime[1],
   5.193  		    ctime(&atime));
   5.194  	}
   5.195  
   5.196  	if (lr->lr_mask & AT_MTIME) {
   5.197 -		(void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
   5.198 +		(void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
   5.199  		    (u_longlong_t)lr->lr_mtime[0],
   5.200  		    (u_longlong_t)lr->lr_mtime[1],
   5.201  		    ctime(&mtime));
   5.202 @@ -220,7 +226,7 @@
   5.203  static void
   5.204  zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
   5.205  {
   5.206 -	(void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
   5.207 +	(void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
   5.208  	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
   5.209  }
   5.210  
   5.211 @@ -256,7 +262,7 @@
   5.212  };
   5.213  
   5.214  /* ARGSUSED */
   5.215 -static void
   5.216 +static int
   5.217  print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
   5.218  {
   5.219  	int txtype;
   5.220 @@ -280,23 +286,24 @@
   5.221  
   5.222  	zil_rec_info[txtype].zri_count++;
   5.223  	zil_rec_info[0].zri_count++;
   5.224 +
   5.225 +	return (0);
   5.226  }
   5.227  
   5.228  /* ARGSUSED */
   5.229 -static void
   5.230 +static int
   5.231  print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
   5.232  {
   5.233 -	char blkbuf[BP_SPRINTF_LEN];
   5.234 +	char blkbuf[BP_SPRINTF_LEN + 10];
   5.235  	int verbose = MAX(dump_opt['d'], dump_opt['i']);
   5.236  	char *claim;
   5.237  
   5.238  	if (verbose <= 3)
   5.239 -		return;
   5.240 +		return (0);
   5.241  
   5.242  	if (verbose >= 5) {
   5.243  		(void) strcpy(blkbuf, ", ");
   5.244 -		sprintf_blkptr(blkbuf + strlen(blkbuf),
   5.245 -		    BP_SPRINTF_LEN - strlen(blkbuf), bp);
   5.246 +		sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
   5.247  	} else {
   5.248  		blkbuf[0] = '\0';
   5.249  	}
   5.250 @@ -310,6 +317,8 @@
   5.251  
   5.252  	(void) printf("\tBlock seqno %llu, %s%s\n",
   5.253  	    (u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
   5.254 +
   5.255 +	return (0);
   5.256  }
   5.257  
   5.258  static void
   5.259 @@ -342,16 +351,16 @@
   5.260  	int verbose = MAX(dump_opt['d'], dump_opt['i']);
   5.261  	int i;
   5.262  
   5.263 -	if (zh->zh_log.blk_birth == 0 || verbose < 2)
   5.264 +	if (zh->zh_log.blk_birth == 0 || verbose < 1)
   5.265  		return;
   5.266  
   5.267 -	(void) printf("\n    ZIL header: claim_txg %llu, claim_seq %llu",
   5.268 -	    (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
   5.269 +	(void) printf("\n    ZIL header: claim_txg %llu, "
   5.270 +	    "claim_blk_seq %llu, claim_lr_seq %llu",
   5.271 +	    (u_longlong_t)zh->zh_claim_txg,
   5.272 +	    (u_longlong_t)zh->zh_claim_blk_seq,
   5.273 +	    (u_longlong_t)zh->zh_claim_lr_seq);
   5.274  	(void) printf(" replay_seq %llu, flags 0x%llx\n",
   5.275  	    (u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
   5.276 -
   5.277 -	if (verbose >= 4)
   5.278 -		print_log_bp(&zh->zh_log, "\n\tfirst block: ");
   5.279  
   5.280  	for (i = 0; i < TX_MAX_TYPE; i++)
   5.281  		zil_rec_info[i].zri_count = 0;
     6.1 --- a/usr/src/cmd/zpool/zpool_main.c	Fri Oct 30 18:47:17 2009 -0600
     6.2 +++ b/usr/src/cmd/zpool/zpool_main.c	Sun Nov 01 14:14:46 2009 -0800
     6.3 @@ -250,12 +250,12 @@
     6.4  {
     6.5  	FILE *fp = cb;
     6.6  
     6.7 -	(void) fprintf(fp, "\t%-13s  ", zpool_prop_to_name(prop));
     6.8 +	(void) fprintf(fp, "\t%-15s  ", zpool_prop_to_name(prop));
     6.9  
    6.10  	if (zpool_prop_readonly(prop))
    6.11  		(void) fprintf(fp, "  NO   ");
    6.12  	else
    6.13 -		(void) fprintf(fp, " YES    ");
    6.14 +		(void) fprintf(fp, " YES   ");
    6.15  
    6.16  	if (zpool_prop_values(prop) == NULL)
    6.17  		(void) fprintf(fp, "-\n");
    6.18 @@ -302,7 +302,7 @@
    6.19  		(void) fprintf(fp,
    6.20  		    gettext("\nthe following properties are supported:\n"));
    6.21  
    6.22 -		(void) fprintf(fp, "\n\t%-13s  %s  %s\n\n",
    6.23 +		(void) fprintf(fp, "\n\t%-15s  %s   %s\n\n",
    6.24  		    "PROPERTY", "EDIT", "VALUES");
    6.25  
    6.26  		/* Iterate over all properties */
    6.27 @@ -2449,7 +2449,7 @@
    6.28  	int ret;
    6.29  	list_cbdata_t cb = { 0 };
    6.30  	static char default_props[] =
    6.31 -	    "name,size,used,available,capacity,health,altroot";
    6.32 +	    "name,size,used,available,capacity,dedupratio,health,altroot";
    6.33  	char *props = default_props;
    6.34  
    6.35  	/* check options */
    6.36 @@ -3672,9 +3672,12 @@
    6.37  		(void) printf(gettext(" 15  user/group space accounting\n"));
    6.38  		(void) printf(gettext(" 16  stmf property support\n"));
    6.39  		(void) printf(gettext(" 17  Triple-parity RAID-Z\n"));
    6.40 -		(void) printf(gettext(" 18  snapshot user holds\n"));
    6.41 +		(void) printf(gettext(" 18  Snapshot user holds\n"));
    6.42  		(void) printf(gettext(" 19  Log device removal\n"));
    6.43 -		(void) printf(gettext("For more information on a particular "
    6.44 +		(void) printf(gettext(" 20  Compression using zle "
    6.45 +		    "(zero-length encoding)\n"));
    6.46 +		(void) printf(gettext(" 21  Deduplication\n"));
    6.47 +		(void) printf(gettext("\nFor more information on a particular "
    6.48  		    "version, including supported releases, see:\n\n"));
    6.49  		(void) printf("http://www.opensolaris.org/os/community/zfs/"
    6.50  		    "version/N\n\n");
     7.1 --- a/usr/src/cmd/ztest/ztest.c	Fri Oct 30 18:47:17 2009 -0600
     7.2 +++ b/usr/src/cmd/ztest/ztest.c	Sun Nov 01 14:14:46 2009 -0800
     7.3 @@ -86,9 +86,8 @@
     7.4  #include <sys/mman.h>
     7.5  #include <sys/resource.h>
     7.6  #include <sys/zio.h>
     7.7 -#include <sys/zio_checksum.h>
     7.8 -#include <sys/zio_compress.h>
     7.9  #include <sys/zil.h>
    7.10 +#include <sys/zil_impl.h>
    7.11  #include <sys/vdev_impl.h>
    7.12  #include <sys/vdev_file.h>
    7.13  #include <sys/spa_impl.h>
    7.14 @@ -106,6 +105,7 @@
    7.15  #include <ctype.h>
    7.16  #include <math.h>
    7.17  #include <sys/fs/zfs.h>
    7.18 +#include <libnvpair.h>
    7.19  
    7.20  static char cmdname[] = "ztest";
    7.21  static char *zopt_pool = cmdname;
    7.22 @@ -127,112 +127,171 @@
    7.23  static uint64_t zopt_time = 300;	/* 5 minutes */
    7.24  static int zopt_maxfaults;
    7.25  
    7.26 +#define	BT_MAGIC	0x123456789abcdefULL
    7.27 +
    7.28 +enum ztest_io_type {
    7.29 +	ZTEST_IO_WRITE_TAG,
    7.30 +	ZTEST_IO_WRITE_PATTERN,
    7.31 +	ZTEST_IO_WRITE_ZEROES,
    7.32 +	ZTEST_IO_TRUNCATE,
    7.33 +	ZTEST_IO_SETATTR,
    7.34 +	ZTEST_IO_TYPES
    7.35 +};
    7.36 +
    7.37  typedef struct ztest_block_tag {
    7.38 +	uint64_t	bt_magic;
    7.39  	uint64_t	bt_objset;
    7.40  	uint64_t	bt_object;
    7.41  	uint64_t	bt_offset;
    7.42 +	uint64_t	bt_gen;
    7.43  	uint64_t	bt_txg;
    7.44 -	uint64_t	bt_thread;
    7.45 -	uint64_t	bt_seq;
    7.46 +	uint64_t	bt_crtxg;
    7.47  } ztest_block_tag_t;
    7.48  
    7.49 -typedef struct ztest_args {
    7.50 -	char		za_pool[MAXNAMELEN];
    7.51 -	spa_t		*za_spa;
    7.52 -	objset_t	*za_os;
    7.53 -	zilog_t		*za_zilog;
    7.54 -	thread_t	za_thread;
    7.55 -	uint64_t	za_instance;
    7.56 -	uint64_t	za_random;
    7.57 -	uint64_t	za_diroff;
    7.58 -	uint64_t	za_diroff_shared;
    7.59 -	uint64_t	za_zil_seq;
    7.60 -	hrtime_t	za_start;
    7.61 -	hrtime_t	za_stop;
    7.62 -	hrtime_t	za_kill;
    7.63 -	/*
    7.64 -	 * Thread-local variables can go here to aid debugging.
    7.65 -	 */
    7.66 -	ztest_block_tag_t za_rbt;
    7.67 -	ztest_block_tag_t za_wbt;
    7.68 -	dmu_object_info_t za_doi;
    7.69 -	dmu_buf_t	*za_dbuf;
    7.70 -} ztest_args_t;
    7.71 -
    7.72 -typedef void ztest_func_t(ztest_args_t *);
    7.73 +typedef struct bufwad {
    7.74 +	uint64_t	bw_index;
    7.75 +	uint64_t	bw_txg;
    7.76 +	uint64_t	bw_data;
    7.77 +} bufwad_t;
    7.78 +
    7.79 +/*
    7.80 + * XXX -- fix zfs range locks to be generic so we can use them here.
    7.81 + */
    7.82 +typedef enum {
    7.83 +	RL_READER,
    7.84 +	RL_WRITER,
    7.85 +	RL_APPEND
    7.86 +} rl_type_t;
    7.87 +
    7.88 +typedef struct rll {
    7.89 +	void		*rll_writer;
    7.90 +	int		rll_readers;
    7.91 +	mutex_t		rll_lock;
    7.92 +	cond_t		rll_cv;
    7.93 +} rll_t;
    7.94 +
    7.95 +typedef struct rl {
    7.96 +	uint64_t	rl_object;
    7.97 +	uint64_t	rl_offset;
    7.98 +	uint64_t	rl_size;
    7.99 +	rll_t		*rl_lock;
   7.100 +} rl_t;
   7.101 +
   7.102 +#define	ZTEST_RANGE_LOCKS	64
   7.103 +#define	ZTEST_OBJECT_LOCKS	64
   7.104 +
   7.105 +/*
   7.106 + * Object descriptor.  Used as a template for object lookup/create/remove.
   7.107 + */
   7.108 +typedef struct ztest_od {
   7.109 +	uint64_t	od_dir;
   7.110 +	uint64_t	od_object;
   7.111 +	dmu_object_type_t od_type;
   7.112 +	dmu_object_type_t od_crtype;
   7.113 +	uint64_t	od_blocksize;
   7.114 +	uint64_t	od_crblocksize;
   7.115 +	uint64_t	od_gen;
   7.116 +	uint64_t	od_crgen;
   7.117 +	char		od_name[MAXNAMELEN];
   7.118 +} ztest_od_t;
   7.119 +
   7.120 +/*
   7.121 + * Per-dataset state.
   7.122 + */
   7.123 +typedef struct ztest_ds {
   7.124 +	objset_t	*zd_os;
   7.125 +	zilog_t		*zd_zilog;
   7.126 +	uint64_t	zd_seq;
   7.127 +	ztest_od_t	*zd_od;		/* debugging aid */
   7.128 +	char		zd_name[MAXNAMELEN];
   7.129 +	mutex_t		zd_dirobj_lock;
   7.130 +	rll_t		zd_object_lock[ZTEST_OBJECT_LOCKS];
   7.131 +	rll_t		zd_range_lock[ZTEST_RANGE_LOCKS];
   7.132 +} ztest_ds_t;
   7.133 +
   7.134 +/*
   7.135 + * Per-iteration state.
   7.136 + */
   7.137 +typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
   7.138 +
   7.139 +typedef struct ztest_info {
   7.140 +	ztest_func_t	*zi_func;	/* test function */
   7.141 +	uint64_t	zi_iters;	/* iterations per execution */
   7.142 +	uint64_t	*zi_interval;	/* execute every <interval> seconds */
   7.143 +	uint64_t	zi_call_count;	/* per-pass count */
   7.144 +	uint64_t	zi_call_time;	/* per-pass time */
   7.145 +	uint64_t	zi_call_next;	/* next time to call this function */
   7.146 +} ztest_info_t;
   7.147  
   7.148  /*
   7.149   * Note: these aren't static because we want dladdr() to work.
   7.150   */
   7.151  ztest_func_t ztest_dmu_read_write;
   7.152 -ztest_func_t ztest_dmu_read_write_zcopy;
   7.153  ztest_func_t ztest_dmu_write_parallel;
   7.154  ztest_func_t ztest_dmu_object_alloc_free;
   7.155  ztest_func_t ztest_dmu_commit_callbacks;
   7.156  ztest_func_t ztest_zap;
   7.157 +ztest_func_t ztest_zap_parallel;
   7.158 +ztest_func_t ztest_zil_commit;
   7.159 +ztest_func_t ztest_dmu_read_write_zcopy;
   7.160 +ztest_func_t ztest_dmu_objset_create_destroy;
   7.161 +ztest_func_t ztest_dmu_prealloc;
   7.162  ztest_func_t ztest_fzap;
   7.163 -ztest_func_t ztest_zap_parallel;
   7.164 -ztest_func_t ztest_traverse;
   7.165 +ztest_func_t ztest_dmu_snapshot_create_destroy;
   7.166  ztest_func_t ztest_dsl_prop_get_set;
   7.167 -ztest_func_t ztest_dmu_objset_create_destroy;
   7.168 -ztest_func_t ztest_dmu_snapshot_create_destroy;
   7.169 -ztest_func_t ztest_dsl_dataset_promote_busy;
   7.170 +ztest_func_t ztest_spa_prop_get_set;
   7.171  ztest_func_t ztest_spa_create_destroy;
   7.172  ztest_func_t ztest_fault_inject;
   7.173 +ztest_func_t ztest_ddt_repair;
   7.174 +ztest_func_t ztest_dmu_snapshot_hold;
   7.175  ztest_func_t ztest_spa_rename;
   7.176 +ztest_func_t ztest_scrub;
   7.177 +ztest_func_t ztest_dsl_dataset_promote_busy;
   7.178  ztest_func_t ztest_vdev_attach_detach;
   7.179  ztest_func_t ztest_vdev_LUN_growth;
   7.180  ztest_func_t ztest_vdev_add_remove;
   7.181  ztest_func_t ztest_vdev_aux_add_remove;
   7.182 -ztest_func_t ztest_scrub;
   7.183 -ztest_func_t ztest_dmu_snapshot_hold;
   7.184 -
   7.185 -typedef struct ztest_info {
   7.186 -	ztest_func_t	*zi_func;	/* test function */
   7.187 -	uint64_t	zi_iters;	/* iterations per execution */
   7.188 -	uint64_t	*zi_interval;	/* execute every <interval> seconds */
   7.189 -	uint64_t	zi_calls;	/* per-pass count */
   7.190 -	uint64_t	zi_call_time;	/* per-pass time */
   7.191 -	uint64_t	zi_call_total;	/* cumulative total */
   7.192 -	uint64_t	zi_call_target;	/* target cumulative total */
   7.193 -} ztest_info_t;
   7.194 -
   7.195 -uint64_t zopt_always = 0;		/* all the time */
   7.196 -uint64_t zopt_often = 1;		/* every second */
   7.197 -uint64_t zopt_sometimes = 10;		/* every 10 seconds */
   7.198 -uint64_t zopt_rarely = 60;		/* every 60 seconds */
   7.199 +
   7.200 +uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
   7.201 +uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
   7.202 +uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
   7.203 +uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
   7.204 +uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
   7.205  
   7.206  ztest_info_t ztest_info[] = {
   7.207  	{ ztest_dmu_read_write,			1,	&zopt_always	},
   7.208 -	{ ztest_dmu_write_parallel,		30,	&zopt_always	},
   7.209 +	{ ztest_dmu_write_parallel,		10,	&zopt_always	},
   7.210  	{ ztest_dmu_object_alloc_free,		1,	&zopt_always	},
   7.211 -	{ ztest_dmu_commit_callbacks,		10,	&zopt_always	},
   7.212 +	{ ztest_dmu_commit_callbacks,		1,	&zopt_always	},
   7.213  	{ ztest_zap,				30,	&zopt_always	},
   7.214 -	{ ztest_fzap,				1,	&zopt_always	},
   7.215  	{ ztest_zap_parallel,			100,	&zopt_always	},
   7.216 -	{ ztest_dmu_read_write_zcopy,		1,	&zopt_sometimes	},
   7.217 -	{ ztest_dsl_prop_get_set,		1,	&zopt_sometimes	},
   7.218 -	{ ztest_dmu_objset_create_destroy,	1,	&zopt_sometimes },
   7.219 -	{ ztest_dmu_snapshot_create_destroy,	1,	&zopt_sometimes },
   7.220 -	{ ztest_spa_create_destroy,		1,	&zopt_sometimes },
   7.221 +	{ ztest_zil_commit,			1,	&zopt_incessant	},
   7.222 +	{ ztest_dmu_read_write_zcopy,		1,	&zopt_often	},
   7.223 +	{ ztest_dmu_objset_create_destroy,	1,	&zopt_often	},
   7.224 +	{ ztest_dsl_prop_get_set,		1,	&zopt_often	},
   7.225 +	{ ztest_spa_prop_get_set,		1,	&zopt_sometimes	},
   7.226 +#if 0
   7.227 +	{ ztest_dmu_prealloc,			1,	&zopt_sometimes	},
   7.228 +#endif
   7.229 +	{ ztest_fzap,				1,	&zopt_sometimes	},
   7.230 +	{ ztest_dmu_snapshot_create_destroy,	1,	&zopt_sometimes	},
   7.231 +	{ ztest_spa_create_destroy,		1,	&zopt_sometimes	},
   7.232  	{ ztest_fault_inject,			1,	&zopt_sometimes	},
   7.233 +	{ ztest_ddt_repair,			1,	&zopt_sometimes	},
   7.234  	{ ztest_dmu_snapshot_hold,		1,	&zopt_sometimes	},
   7.235  	{ ztest_spa_rename,			1,	&zopt_rarely	},
   7.236 +	{ ztest_scrub,				1,	&zopt_rarely	},
   7.237 +	{ ztest_dsl_dataset_promote_busy,	1,	&zopt_rarely	},
   7.238  	{ ztest_vdev_attach_detach,		1,	&zopt_rarely	},
   7.239  	{ ztest_vdev_LUN_growth,		1,	&zopt_rarely	},
   7.240 -	{ ztest_dsl_dataset_promote_busy,	1,	&zopt_rarely	},
   7.241  	{ ztest_vdev_add_remove,		1,	&zopt_vdevtime	},
   7.242  	{ ztest_vdev_aux_add_remove,		1,	&zopt_vdevtime	},
   7.243 -	{ ztest_scrub,				1,	&zopt_vdevtime	},
   7.244  };
   7.245  
   7.246  #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
   7.247  
   7.248 -#define	ZTEST_SYNC_LOCKS	16
   7.249 -
   7.250  /*
   7.251   * The following struct is used to hold a list of uncalled commit callbacks.
   7.252 - *
   7.253   * The callbacks are ordered by txg number.
   7.254   */
   7.255  typedef struct ztest_cb_list {
   7.256 @@ -244,28 +303,34 @@
   7.257   * Stuff we need to share writably between parent and child.
   7.258   */
   7.259  typedef struct ztest_shared {
   7.260 +	char		*zs_pool;
   7.261 +	spa_t		*zs_spa;
   7.262 +	hrtime_t	zs_proc_start;
   7.263 +	hrtime_t	zs_proc_stop;
   7.264 +	hrtime_t	zs_thread_start;
   7.265 +	hrtime_t	zs_thread_stop;
   7.266 +	hrtime_t	zs_thread_kill;
   7.267 +	uint64_t	zs_enospc_count;
   7.268 +	uint64_t	zs_vdev_next_leaf;
   7.269 +	uint64_t	zs_vdev_aux;
   7.270 +	uint64_t	zs_alloc;
   7.271 +	uint64_t	zs_space;
   7.272  	mutex_t		zs_vdev_lock;
   7.273  	rwlock_t	zs_name_lock;
   7.274 -	uint64_t	zs_vdev_next_leaf;
   7.275 -	uint64_t	zs_vdev_aux;
   7.276 -	uint64_t	zs_enospc_count;
   7.277 -	hrtime_t	zs_start_time;
   7.278 -	hrtime_t	zs_stop_time;
   7.279 -	uint64_t	zs_alloc;
   7.280 -	uint64_t	zs_space;
   7.281  	ztest_info_t	zs_info[ZTEST_FUNCS];
   7.282 -	mutex_t		zs_sync_lock[ZTEST_SYNC_LOCKS];
   7.283 -	uint64_t	zs_seq[ZTEST_SYNC_LOCKS];
   7.284 +	ztest_ds_t	zs_zd[];
   7.285  } ztest_shared_t;
   7.286 +
   7.287 +#define	ID_PARALLEL	-1ULL
   7.288  
   7.289  static char ztest_dev_template[] = "%s/%s.%llua";
   7.290  static char ztest_aux_template[] = "%s/%s.%s.%llu";
   7.291 -static ztest_shared_t *ztest_shared;
   7.292 +ztest_shared_t *ztest_shared;
   7.293 +uint64_t *ztest_seq;
   7.294  
   7.295  static int ztest_random_fd;
   7.296  static int ztest_dump_core = 1;
   7.297  
   7.298 -static uint64_t metaslab_sz;
   7.299  static boolean_t ztest_exiting;
   7.300  
   7.301  /* Global commit callback list */
   7.302 @@ -273,13 +338,13 @@
   7.303  
   7.304  extern uint64_t metaslab_gang_bang;
   7.305  extern uint64_t metaslab_df_alloc_threshold;
   7.306 -
   7.307 -#define	ZTEST_DIROBJ		1
   7.308 -#define	ZTEST_MICROZAP_OBJ	2
   7.309 -#define	ZTEST_FATZAP_OBJ	3
   7.310 -
   7.311 -#define	ZTEST_DIROBJ_BLOCKSIZE	(1 << 10)
   7.312 -#define	ZTEST_DIRSIZE		256
   7.313 +static uint64_t metaslab_sz;
   7.314 +
   7.315 +enum ztest_object {
   7.316 +	ZTEST_META_DNODE = 0,
   7.317 +	ZTEST_DIROBJ,
   7.318 +	ZTEST_OBJECTS
   7.319 +};
   7.320  
   7.321  static void usage(boolean_t) __NORETURN;
   7.322  
   7.323 @@ -433,27 +498,6 @@
   7.324  	exit(requested ? 0 : 1);
   7.325  }
   7.326  
   7.327 -static uint64_t
   7.328 -ztest_random(uint64_t range)
   7.329 -{
   7.330 -	uint64_t r;
   7.331 -
   7.332 -	if (range == 0)
   7.333 -		return (0);
   7.334 -
   7.335 -	if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
   7.336 -		fatal(1, "short read from /dev/urandom");
   7.337 -
   7.338 -	return (r % range);
   7.339 -}
   7.340 -
   7.341 -/* ARGSUSED */
   7.342 -static void
   7.343 -ztest_record_enospc(char *s)
   7.344 -{
   7.345 -	ztest_shared->zs_enospc_count++;
   7.346 -}
   7.347 -
   7.348  static void
   7.349  process_options(int argc, char **argv)
   7.350  {
   7.351 @@ -546,8 +590,38 @@
   7.352  
   7.353  	zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
   7.354  
   7.355 -	zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
   7.356 +	zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time * NANOSEC / zopt_vdevs :
   7.357 +	    UINT64_MAX >> 2);
   7.358  	zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
   7.359 +}
   7.360 +
   7.361 +static void
   7.362 +ztest_kill(ztest_shared_t *zs)
   7.363 +{
   7.364 +	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(zs->zs_spa));
   7.365 +	zs->zs_space = metaslab_class_get_space(spa_normal_class(zs->zs_spa));
   7.366 +	(void) kill(getpid(), SIGKILL);
   7.367 +}
   7.368 +
   7.369 +static uint64_t
   7.370 +ztest_random(uint64_t range)
   7.371 +{
   7.372 +	uint64_t r;
   7.373 +
   7.374 +	if (range == 0)
   7.375 +		return (0);
   7.376 +
   7.377 +	if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
   7.378 +		fatal(1, "short read from /dev/urandom");
   7.379 +
   7.380 +	return (r % range);
   7.381 +}
   7.382 +
   7.383 +/* ARGSUSED */
   7.384 +static void
   7.385 +ztest_record_enospc(const char *s)
   7.386 +{
   7.387 +	ztest_shared->zs_enospc_count++;
   7.388  }
   7.389  
   7.390  static uint64_t
   7.391 @@ -687,100 +761,805 @@
   7.392  	return (root);
   7.393  }
   7.394  
   7.395 -static void
   7.396 -ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
   7.397 -{
   7.398 -	int bs = SPA_MINBLOCKSHIFT +
   7.399 -	    ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
   7.400 -	int ibs = DN_MIN_INDBLKSHIFT +
   7.401 -	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
   7.402 -	int error;
   7.403 -
   7.404 -	error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
   7.405 -	if (error) {
   7.406 -		char osname[300];
   7.407 -		dmu_objset_name(os, osname);
   7.408 -		fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
   7.409 -		    osname, object, 1 << bs, ibs, error);
   7.410 -	}
   7.411 -}
   7.412 -
   7.413 -static uint8_t
   7.414 -ztest_random_checksum(void)
   7.415 -{
   7.416 -	uint8_t checksum;
   7.417 +static int
   7.418 +ztest_random_blocksize(void)
   7.419 +{
   7.420 +	return (1 << (SPA_MINBLOCKSHIFT +
   7.421 +	    ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)));
   7.422 +}
   7.423 +
   7.424 +static int
   7.425 +ztest_random_ibshift(void)
   7.426 +{
   7.427 +	return (DN_MIN_INDBLKSHIFT +
   7.428 +	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
   7.429 +}
   7.430 +
   7.431 +static uint64_t
   7.432 +ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
   7.433 +{
   7.434 +	uint64_t top;
   7.435 +	vdev_t *rvd = spa->spa_root_vdev;
   7.436 +	vdev_t *tvd;
   7.437 +
   7.438 +	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
   7.439  
   7.440  	do {
   7.441 -		checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
   7.442 -	} while (zio_checksum_table[checksum].ci_zbt);
   7.443 -
   7.444 -	if (checksum == ZIO_CHECKSUM_OFF)
   7.445 -		checksum = ZIO_CHECKSUM_ON;
   7.446 -
   7.447 -	return (checksum);
   7.448 -}
   7.449 -
   7.450 -static uint8_t
   7.451 -ztest_random_compress(void)
   7.452 -{
   7.453 -	return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
   7.454 -}
   7.455 -
   7.456 -static int
   7.457 -ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap)
   7.458 -{
   7.459 -	dmu_tx_t *tx;
   7.460 -	int error;
   7.461 +		top = ztest_random(rvd->vdev_children);
   7.462 +		tvd = rvd->vdev_child[top];
   7.463 +	} while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
   7.464 +	    tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
   7.465 +
   7.466 +	return (top);
   7.467 +}
   7.468 +
   7.469 +static uint64_t
   7.470 +ztest_random_dsl_prop(zfs_prop_t prop)
   7.471 +{
   7.472 +	uint64_t value;
   7.473 +
   7.474 +	do {
   7.475 +		value = zfs_prop_random_value(prop, ztest_random(-1ULL));
   7.476 +	} while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
   7.477 +
   7.478 +	return (value);
   7.479 +}
   7.480 +
   7.481 +static int
   7.482 +ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
   7.483 +    boolean_t inherit)
   7.484 +{
   7.485 +	const char *propname = zfs_prop_to_name(prop);
   7.486 +	const char *valname;
   7.487 +	char setpoint[MAXPATHLEN];
   7.488 +	uint64_t curval;
   7.489 +	int error;
   7.490 +
   7.491 +	error = dsl_prop_set(osname, propname, sizeof (value),
   7.492 +	    inherit ? 0 : 1, &value);
   7.493 +
   7.494 +	if (error == ENOSPC) {
   7.495 +		ztest_record_enospc(FTAG);
   7.496 +		return (error);
   7.497 +	}
   7.498 +	ASSERT3U(error, ==, 0);
   7.499 +
   7.500 +	VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
   7.501 +	    1, &curval, setpoint), ==, 0);
   7.502 +
   7.503 +	if (zopt_verbose >= 6) {
   7.504 +		VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
   7.505 +		(void) printf("%s %s = %s at '%s'\n",
   7.506 +		    osname, propname, valname, setpoint);
   7.507 +	}
   7.508 +
   7.509 +	return (error);
   7.510 +}
   7.511 +
   7.512 +#if 0
   7.513 +static int
   7.514 +ztest_spa_prop_set_uint64(ztest_shared_t *zs, zpool_prop_t prop, uint64_t value)
   7.515 +{
   7.516 +	spa_t *spa = zs->zs_spa;
   7.517 +	nvlist_t *props = NULL;
   7.518 +	int error;
   7.519 +
   7.520 +	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
   7.521 +	VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
   7.522 +
   7.523 +	error = spa_prop_set(spa, props);
   7.524 +
   7.525 +	nvlist_free(props);
   7.526 +
   7.527 +	if (error == ENOSPC) {
   7.528 +		ztest_record_enospc(FTAG);
   7.529 +		return (error);
   7.530 +	}
   7.531 +	ASSERT3U(error, ==, 0);
   7.532 +
   7.533 +	return (error);
   7.534 +}
   7.535 +#endif
   7.536 +
   7.537 +static void
   7.538 +ztest_rll_init(rll_t *rll)
   7.539 +{
   7.540 +	rll->rll_writer = NULL;
   7.541 +	rll->rll_readers = 0;
   7.542 +	VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
   7.543 +	VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
   7.544 +}
   7.545 +
   7.546 +static void
   7.547 +ztest_rll_destroy(rll_t *rll)
   7.548 +{
   7.549 +	ASSERT(rll->rll_writer == NULL);
   7.550 +	ASSERT(rll->rll_readers == 0);
   7.551 +	VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
   7.552 +	VERIFY(cond_destroy(&rll->rll_cv) == 0);
   7.553 +}
   7.554 +
   7.555 +static void
   7.556 +ztest_rll_lock(rll_t *rll, rl_type_t type)
   7.557 +{
   7.558 +	VERIFY(mutex_lock(&rll->rll_lock) == 0);
   7.559 +
   7.560 +	if (type == RL_READER) {
   7.561 +		while (rll->rll_writer != NULL)
   7.562 +			(void) cond_wait(&rll->rll_cv, &rll->rll_lock);
   7.563 +		rll->rll_readers++;
   7.564 +	} else {
   7.565 +		while (rll->rll_writer != NULL || rll->rll_readers)
   7.566 +			(void) cond_wait(&rll->rll_cv, &rll->rll_lock);
   7.567 +		rll->rll_writer = curthread;
   7.568 +	}
   7.569 +
   7.570 +	VERIFY(mutex_unlock(&rll->rll_lock) == 0);
   7.571 +}
   7.572 +
   7.573 +static void
   7.574 +ztest_rll_unlock(rll_t *rll)
   7.575 +{
   7.576 +	VERIFY(mutex_lock(&rll->rll_lock) == 0);
   7.577 +
   7.578 +	if (rll->rll_writer) {
   7.579 +		ASSERT(rll->rll_readers == 0);
   7.580 +		rll->rll_writer = NULL;
   7.581 +	} else {
   7.582 +		ASSERT(rll->rll_readers != 0);
   7.583 +		ASSERT(rll->rll_writer == NULL);
   7.584 +		rll->rll_readers--;
   7.585 +	}
   7.586 +
   7.587 +	if (rll->rll_writer == NULL && rll->rll_readers == 0)
   7.588 +		VERIFY(cond_broadcast(&rll->rll_cv) == 0);
   7.589 +
   7.590 +	VERIFY(mutex_unlock(&rll->rll_lock) == 0);
   7.591 +}
   7.592 +
   7.593 +static void
   7.594 +ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
   7.595 +{
   7.596 +	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
   7.597 +
   7.598 +	ztest_rll_lock(rll, type);
   7.599 +}
   7.600 +
   7.601 +static void
   7.602 +ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
   7.603 +{
   7.604 +	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
   7.605 +
   7.606 +	ztest_rll_unlock(rll);
   7.607 +}
   7.608 +
   7.609 +static rl_t *
   7.610 +ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
   7.611 +    uint64_t size, rl_type_t type)
   7.612 +{
   7.613 +	uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
   7.614 +	rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
   7.615 +	rl_t *rl;
   7.616 +
   7.617 +	rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
   7.618 +	rl->rl_object = object;
   7.619 +	rl->rl_offset = offset;
   7.620 +	rl->rl_size = size;
   7.621 +	rl->rl_lock = rll;
   7.622 +
   7.623 +	ztest_rll_lock(rll, type);
   7.624 +
   7.625 +	return (rl);
   7.626 +}
   7.627 +
   7.628 +static void
   7.629 +ztest_range_unlock(rl_t *rl)
   7.630 +{
   7.631 +	rll_t *rll = rl->rl_lock;
   7.632 +
   7.633 +	ztest_rll_unlock(rll);
   7.634 +
   7.635 +	umem_free(rl, sizeof (*rl));
   7.636 +}
   7.637 +
   7.638 +static void
   7.639 +ztest_zd_init(ztest_ds_t *zd, objset_t *os)
   7.640 +{
   7.641 +	zd->zd_os = os;
   7.642 +	zd->zd_zilog = dmu_objset_zil(os);
   7.643 +	zd->zd_seq = 0;
   7.644 +	dmu_objset_name(os, zd->zd_name);
   7.645 +
   7.646 +	VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
   7.647 +
   7.648 +	for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
   7.649 +		ztest_rll_init(&zd->zd_object_lock[l]);
   7.650 +
   7.651 +	for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
   7.652 +		ztest_rll_init(&zd->zd_range_lock[l]);
   7.653 +}
   7.654 +
   7.655 +static void
   7.656 +ztest_zd_fini(ztest_ds_t *zd)
   7.657 +{
   7.658 +	VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
   7.659 +
   7.660 +	for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
   7.661 +		ztest_rll_destroy(&zd->zd_object_lock[l]);
   7.662 +
   7.663 +	for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
   7.664 +		ztest_rll_destroy(&zd->zd_range_lock[l]);
   7.665 +}
   7.666 +
   7.667 +#define	TXG_MIGHTWAIT	(ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
   7.668 +
   7.669 +static uint64_t
   7.670 +ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
   7.671 +{
   7.672 +	uint64_t txg;
   7.673 +	int error;
   7.674 +
   7.675 +	/*
   7.676 +	 * Attempt to assign tx to some transaction group.
   7.677 +	 */
   7.678 +	error = dmu_tx_assign(tx, txg_how);
   7.679 +	if (error) {
   7.680 +		if (error == ERESTART) {
   7.681 +			ASSERT(txg_how == TXG_NOWAIT);
   7.682 +			dmu_tx_wait(tx);
   7.683 +		} else {
   7.684 +			ASSERT3U(error, ==, ENOSPC);
   7.685 +			ztest_record_enospc(tag);
   7.686 +		}
   7.687 +		dmu_tx_abort(tx);
   7.688 +		return (0);
   7.689 +	}
   7.690 +	txg = dmu_tx_get_txg(tx);
   7.691 +	ASSERT(txg != 0);
   7.692 +	return (txg);
   7.693 +}
   7.694 +
   7.695 +static void
   7.696 +ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
   7.697 +{
   7.698 +	uint64_t *ip = buf;
   7.699 +	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
   7.700 +
   7.701 +	while (ip < ip_end)
   7.702 +		*ip++ = value;
   7.703 +}
   7.704 +
   7.705 +static boolean_t
   7.706 +ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
   7.707 +{
   7.708 +	uint64_t *ip = buf;
   7.709 +	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
   7.710 +	uint64_t diff = 0;
   7.711 +
   7.712 +	while (ip < ip_end)
   7.713 +		diff |= (value - *ip++);
   7.714 +
   7.715 +	return (diff == 0);
   7.716 +}
   7.717 +
   7.718 +static void
   7.719 +ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
   7.720 +    uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
   7.721 +{
   7.722 +	bt->bt_magic = BT_MAGIC;
   7.723 +	bt->bt_objset = dmu_objset_id(os);
   7.724 +	bt->bt_object = object;
   7.725 +	bt->bt_offset = offset;
   7.726 +	bt->bt_gen = gen;
   7.727 +	bt->bt_txg = txg;
   7.728 +	bt->bt_crtxg = crtxg;
   7.729 +}
   7.730 +
   7.731 +static void
   7.732 +ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
   7.733 +    uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
   7.734 +{
   7.735 +	ASSERT(bt->bt_magic == BT_MAGIC);
   7.736