1 --- a/usr/src/cmd/filebench/Makefile.com Fri Oct 30 18:47:17 2009 -0600
2 +++ b/usr/src/cmd/filebench/Makefile.com Sun Nov 01 14:14:46 2009 -0800
3 @@ -51,9 +51,9 @@
4 ROOTFBBINDIR = $(ROOT)/usr/benchmarks/filebench/bin
5 OBJS = $(SRCS:%.c=%.o) parser_gram.o parser_lex.o
6 LINTFLAGS += -erroff=E_FUNC_ARG_UNUSED -erroff=E_NAME_DEF_NOT_USED2 \
7 - -erroff=E_NAME_USED_NOT_DEF2
8 + -erroff=E_NAME_USED_NOT_DEF2 -erroff=E_INCONS_ARG_DECL2
9 LINTFLAGS64 += -erroff=E_FUNC_ARG_UNUSED -erroff=E_NAME_DEF_NOT_USED2 \
10 - -erroff=E_NAME_USED_NOT_DEF2
11 + -erroff=E_NAME_USED_NOT_DEF2 -erroff=E_INCONS_ARG_DECL2
12 LINTFILES = $(SRCS:%.c=%.ln)
13 CLEANFILES += parser_gram.c parser_gram.h parser_lex.c y.tab.h y.tab.c
14
1.1 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c Fri Oct 30 18:47:17 2009 -0600
1.2 +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c Sun Nov 01 14:14:46 2009 -0800
1.3 @@ -35,7 +35,6 @@
1.4 #include <sys/list.h>
1.5 #include <sys/spa_impl.h>
1.6 #include <sys/vdev_impl.h>
1.7 -#include <sys/zio_compress.h>
1.8 #include <ctype.h>
1.9
1.10 #ifndef _KERNEL
1.11 @@ -47,15 +46,6 @@
1.12 #else
1.13 #define ZFS_OBJ_NAME "libzpool.so.1"
1.14 #endif
1.15 -
1.16 -static char *
1.17 -local_strdup(const char *s)
1.18 -{
1.19 - char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
1.20 -
1.21 - (void) strcpy(s1, s);
1.22 - return (s1);
1.23 -}
1.24
1.25 static int
1.26 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
1.27 @@ -128,27 +118,6 @@
1.28 off /= 8;
1.29
1.30 return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
1.31 -}
1.32 -
1.33 -static int
1.34 -read_symbol(char *sym_name, void **bufp)
1.35 -{
1.36 - GElf_Sym sym;
1.37 -
1.38 - if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
1.39 - mdb_warn("can't find symbol %s", sym_name);
1.40 - return (DCMD_ERR);
1.41 - }
1.42 -
1.43 - *bufp = mdb_alloc(sym.st_size, UM_SLEEP);
1.44 -
1.45 - if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
1.46 - mdb_warn("can't read data for symbol %s", sym_name);
1.47 - mdb_free(*bufp, sym.st_size);
1.48 - return (DCMD_ERR);
1.49 - }
1.50 -
1.51 - return (DCMD_OK);
1.52 }
1.53
1.54 static int verbose;
1.55 @@ -305,30 +274,6 @@
1.56
1.57 /* ARGSUSED */
1.58 static int
1.59 -zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1.60 -{
1.61 - mdb_ctf_id_t pipe_enum;
1.62 - int i;
1.63 - char stage[1024];
1.64 -
1.65 - if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
1.66 - mdb_warn("Could not find enum zio_stage");
1.67 - return (DCMD_ERR);
1.68 - }
1.69 -
1.70 - for (i = 0; i < 32; i++) {
1.71 - if (addr & (1U << i)) {
1.72 - enum_lookup(stage, sizeof (stage), pipe_enum, i,
1.73 - "ZIO_STAGE_");
1.74 - mdb_printf(" %s\n", stage);
1.75 - }
1.76 - }
1.77 -
1.78 - return (DCMD_OK);
1.79 -}
1.80 -
1.81 -/* ARGSUSED */
1.82 -static int
1.83 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1.84 {
1.85 /*
1.86 @@ -351,9 +296,8 @@
1.87 "metaslab_aliquot",
1.88 "reference_tracking_enable",
1.89 "reference_history",
1.90 - "zio_taskq_threads",
1.91 "spa_max_replication_override",
1.92 - "spa_mode",
1.93 + "spa_mode_global",
1.94 "zfs_flags",
1.95 "zfs_txg_synctime",
1.96 "zfs_txg_timeout",
1.97 @@ -383,9 +327,8 @@
1.98 "zio_injection_enabled",
1.99 "zvol_immediate_write_sz",
1.100 };
1.101 - int i;
1.102
1.103 - for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
1.104 + for (int i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
1.105 int sz;
1.106 uint64_t val64;
1.107 uint32_t *val32p = (uint32_t *)&val64;
1.108 @@ -407,76 +350,33 @@
1.109 static int
1.110 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1.111 {
1.112 - blkptr_t bp;
1.113 - dmu_object_type_info_t *doti;
1.114 - zio_compress_info_t *zct;
1.115 - zio_checksum_info_t *zci;
1.116 - int i;
1.117 - char buf[MAXPATHLEN];
1.118 + mdb_ctf_id_t type_enum, checksum_enum, compress_enum;
1.119 + char type[80], checksum[80], compress[80];
1.120 + blkptr_t blk, *bp = &blk;
1.121 + char buf[BP_SPRINTF_LEN];
1.122
1.123 - if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
1.124 + if (mdb_vread(&blk, sizeof (blkptr_t), addr) == -1) {
1.125 mdb_warn("failed to read blkptr_t");
1.126 return (DCMD_ERR);
1.127 }
1.128
1.129 - if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
1.130 + if (mdb_ctf_lookup_by_name("enum dmu_object_type", &type_enum) == -1 ||
1.131 + mdb_ctf_lookup_by_name("enum zio_checksum", &checksum_enum) == -1 ||
1.132 + mdb_ctf_lookup_by_name("enum zio_compress", &compress_enum) == -1) {
1.133 + mdb_warn("Could not find blkptr enumerated types");
1.134 return (DCMD_ERR);
1.135 - for (i = 0; i < DMU_OT_NUMTYPES; i++) {
1.136 - mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
1.137 - doti[i].ot_name = local_strdup(buf);
1.138 }
1.139
1.140 - if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
1.141 - return (DCMD_ERR);
1.142 - for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
1.143 - mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
1.144 - zci[i].ci_name = local_strdup(buf);
1.145 - }
1.146 + enum_lookup(type, sizeof (type), type_enum,
1.147 + BP_GET_TYPE(bp), "DMU_OT_");
1.148 + enum_lookup(checksum, sizeof (checksum), checksum_enum,
1.149 + BP_GET_CHECKSUM(bp), "ZIO_CHECKSUM_");
1.150 + enum_lookup(compress, sizeof (compress), compress_enum,
1.151 + BP_GET_COMPRESS(bp), "ZIO_COMPRESS_");
1.152
1.153 - if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
1.154 - return (DCMD_ERR);
1.155 - for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
1.156 - mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
1.157 - zct[i].ci_name = local_strdup(buf);
1.158 - }
1.159 + SPRINTF_BLKPTR(mdb_snprintf, '\n', buf, bp, type, checksum, compress);
1.160
1.161 - /*
1.162 - * Super-ick warning: This code is also duplicated in
1.163 - * cmd/zdb.c . Yeah, I hate code replication, too.
1.164 - */
1.165 - for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
1.166 - dva_t *dva = &bp.blk_dva[i];
1.167 -
1.168 - mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
1.169 - DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
1.170 - mdb_printf("DVA[%d]: GANG: %-5s GRID: %04x\t"
1.171 - "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
1.172 - (int)DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
1.173 - mdb_printf("DVA[%d]: %llu:%llx:%llx:%s%s%s%s\n", i,
1.174 - DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
1.175 - BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
1.176 - !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
1.177 - DVA_GET_GANG(dva) ? "g" : "",
1.178 - BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
1.179 - }
1.180 - mdb_printf("LSIZE: %-16llx\t\tPSIZE: %llx\n",
1.181 - BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
1.182 - mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
1.183 - BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
1.184 - BP_GET_TYPE(&bp) < DMU_OT_NUMTYPES ?
1.185 - doti[BP_GET_TYPE(&bp)].ot_name : "UNKNOWN");
1.186 - mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n",
1.187 - bp.blk_birth, (int)BP_GET_LEVEL(&bp), bp.blk_fill);
1.188 - mdb_printf("CKFUNC: %-16s\t\tCOMP: %s\n",
1.189 - BP_GET_CHECKSUM(&bp) < ZIO_CHECKSUM_FUNCTIONS ?
1.190 - zci[BP_GET_CHECKSUM(&bp)].ci_name : "UNKNOWN",
1.191 - BP_GET_COMPRESS(&bp) < ZIO_COMPRESS_FUNCTIONS ?
1.192 - zct[BP_GET_COMPRESS(&bp)].ci_name : "UNKNOWN");
1.193 - mdb_printf("CKSUM: %llx:%llx:%llx:%llx\n",
1.194 - bp.blk_cksum.zc_word[0],
1.195 - bp.blk_cksum.zc_word[1],
1.196 - bp.blk_cksum.zc_word[2],
1.197 - bp.blk_cksum.zc_word[3]);
1.198 + mdb_printf("%s\n", buf);
1.199
1.200 return (DCMD_OK);
1.201 }
1.202 @@ -2293,7 +2193,6 @@
1.203 "zio_t summary", zio_print },
1.204 { "zio_state", "?", "print out all zio_t structures on system or "
1.205 "for a particular pool", zio_state },
1.206 - { "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
1.207 { "zfs_blkstats", ":[-v]",
1.208 "given a spa_t, print block type stats from last scrub",
1.209 zfs_blkstats },
2.1 --- a/usr/src/cmd/sgs/Makefile.var Fri Oct 30 18:47:17 2009 -0600
2.2 +++ b/usr/src/cmd/sgs/Makefile.var Sun Nov 01 14:14:46 2009 -0800
2.3 @@ -75,7 +75,7 @@
2.4 # the system.
2.5 #
2.6 VAR_AVLDIR= $(SRCBASE)/common/avl
2.7 -VAR_AVLINCDIR=
2.8 +VAR_AVLINCDIR= -I $(SRCBASE)/uts/common
2.9
2.10 #
2.11 # VAR_DTRDIR - directory to find dtrace_data.c in.
3.1 --- a/usr/src/cmd/zdb/Makefile.com Fri Oct 30 18:47:17 2009 -0600
3.2 +++ b/usr/src/cmd/zdb/Makefile.com Sun Nov 01 14:14:46 2009 -0800
3.3 @@ -33,6 +33,7 @@
3.4
3.5 INCS += -I../../../lib/libzpool/common
3.6 INCS += -I../../../uts/common/fs/zfs
3.7 +INCS += -I../../../common/zfs
3.8
3.9 LDLIBS += -lzpool -lumem -lnvpair -lzfs -lavl
3.10
4.1 --- a/usr/src/cmd/zdb/zdb.c Fri Oct 30 18:47:17 2009 -0600
4.2 +++ b/usr/src/cmd/zdb/zdb.c Sun Nov 01 14:14:46 2009 -0800
4.3 @@ -51,6 +51,7 @@
4.4 #include <sys/zio_compress.h>
4.5 #include <sys/zfs_fuid.h>
4.6 #include <sys/arc.h>
4.7 +#include <sys/ddt.h>
4.8 #undef ZFS_MAXNAMELEN
4.9 #undef verify
4.10 #include <libzfs.h>
4.11 @@ -72,8 +73,6 @@
4.12 uint64_t *zopt_object = NULL;
4.13 int zopt_objects = 0;
4.14 libzfs_handle_t *g_zfs;
4.15 -boolean_t zdb_sig_user_data = B_TRUE;
4.16 -int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
4.17
4.18 /*
4.19 * These libumem hooks provide a reasonable set of defaults for the allocator's
4.20 @@ -121,8 +120,7 @@
4.21 (void) fprintf(stderr, " -c checksum all metadata (twice for "
4.22 "all data) blocks\n");
4.23 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
4.24 - (void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
4.25 - "dump blkptr signatures\n");
4.26 + (void) fprintf(stderr, " -S simulate dedup to measure effect\n");
4.27 (void) fprintf(stderr, " -v verbose (applies to all others)\n");
4.28 (void) fprintf(stderr, " -l dump label contents\n");
4.29 (void) fprintf(stderr, " -L disable leak tracking (do not "
4.30 @@ -540,6 +538,198 @@
4.31 }
4.32
4.33 static void
4.34 +dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
4.35 +{
4.36 + const ddt_phys_t *ddp = dde->dde_phys;
4.37 + const ddt_key_t *ddk = &dde->dde_key;
4.38 + char *types[4] = { "ditto", "single", "double", "triple" };
4.39 + char blkbuf[BP_SPRINTF_LEN];
4.40 + blkptr_t blk;
4.41 +
4.42 + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
4.43 + if (ddp->ddp_phys_birth == 0)
4.44 + continue;
4.45 + ddt_bp_create(ddt, ddk, ddp, &blk);
4.46 + sprintf_blkptr(blkbuf, &blk);
4.47 + (void) printf("index %llx refcnt %llu %s %s\n",
4.48 + (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
4.49 + types[p], blkbuf);
4.50 + }
4.51 +}
4.52 +
4.53 +static void
4.54 +dump_dedup_ratio(const ddt_stat_t *dds)
4.55 +{
4.56 + double rL, rP, rD, D, dedup, compress, copies;
4.57 +
4.58 + if (dds->dds_blocks == 0)
4.59 + return;
4.60 +
4.61 + rL = (double)dds->dds_ref_lsize;
4.62 + rP = (double)dds->dds_ref_psize;
4.63 + rD = (double)dds->dds_ref_dsize;
4.64 + D = (double)dds->dds_dsize;
4.65 +
4.66 + dedup = rD / D;
4.67 + compress = rL / rP;
4.68 + copies = rD / rP;
4.69 +
4.70 + (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
4.71 + "dedup * compress / copies = %.2f\n\n",
4.72 + dedup, compress, copies, dedup * compress / copies);
4.73 +}
4.74 +
4.75 +static void
4.76 +dump_ddt_stat(const ddt_stat_t *dds, int h)
4.77 +{
4.78 + char refcnt[6];
4.79 + char blocks[6], lsize[6], psize[6], dsize[6];
4.80 + char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
4.81 +
4.82 + if (dds->dds_blocks == 0)
4.83 + return;
4.84 +
4.85 + if (h == -1)
4.86 + (void) strcpy(refcnt, "Total");
4.87 + else
4.88 + nicenum(1ULL << h, refcnt);
4.89 +
4.90 + nicenum(dds->dds_blocks, blocks);
4.91 + nicenum(dds->dds_lsize, lsize);
4.92 + nicenum(dds->dds_psize, psize);
4.93 + nicenum(dds->dds_dsize, dsize);
4.94 + nicenum(dds->dds_ref_blocks, ref_blocks);
4.95 + nicenum(dds->dds_ref_lsize, ref_lsize);
4.96 + nicenum(dds->dds_ref_psize, ref_psize);
4.97 + nicenum(dds->dds_ref_dsize, ref_dsize);
4.98 +
4.99 + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
4.100 + refcnt,
4.101 + blocks, lsize, psize, dsize,
4.102 + ref_blocks, ref_lsize, ref_psize, ref_dsize);
4.103 +}
4.104 +
4.105 +static void
4.106 +dump_ddt_histogram(const ddt_histogram_t *ddh)
4.107 +{
4.108 + ddt_stat_t dds_total = { 0 };
4.109 +
4.110 + ddt_histogram_stat(&dds_total, ddh);
4.111 +
4.112 + (void) printf("\n");
4.113 +
4.114 + (void) printf("bucket "
4.115 + " allocated "
4.116 + " referenced \n");
4.117 + (void) printf("______ "
4.118 + "______________________________ "
4.119 + "______________________________\n");
4.120 +
4.121 + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
4.122 + "refcnt",
4.123 + "blocks", "LSIZE", "PSIZE", "DSIZE",
4.124 + "blocks", "LSIZE", "PSIZE", "DSIZE");
4.125 +
4.126 + (void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
4.127 + "------",
4.128 + "------", "-----", "-----", "-----",
4.129 + "------", "-----", "-----", "-----");
4.130 +
4.131 + for (int h = 0; h < 64; h++)
4.132 + dump_ddt_stat(&ddh->ddh_stat[h], h);
4.133 +
4.134 + dump_ddt_stat(&dds_total, -1);
4.135 +
4.136 + (void) printf("\n");
4.137 +}
4.138 +
4.139 +static void
4.140 +dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
4.141 +{
4.142 + char name[DDT_NAMELEN];
4.143 + ddt_entry_t dde;
4.144 + uint64_t walk = 0;
4.145 + dmu_object_info_t doi;
4.146 + uint64_t count, dspace, mspace;
4.147 + int error;
4.148 +
4.149 + error = ddt_object_info(ddt, type, class, &doi);
4.150 +
4.151 + if (error == ENOENT)
4.152 + return;
4.153 + ASSERT(error == 0);
4.154 +
4.155 + count = ddt_object_count(ddt, type, class);
4.156 + dspace = doi.doi_physical_blocks_512 << 9;
4.157 + mspace = doi.doi_fill_count * doi.doi_data_block_size;
4.158 +
4.159 + ASSERT(count != 0); /* we should have destroyed it */
4.160 +
4.161 + ddt_object_name(ddt, type, class, name);
4.162 +
4.163 + (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
4.164 + name,
4.165 + (u_longlong_t)count,
4.166 + (u_longlong_t)(dspace / count),
4.167 + (u_longlong_t)(mspace / count));
4.168 +
4.169 + if (dump_opt['D'] < 3)
4.170 + return;
4.171 +
4.172 + dump_ddt_histogram(&ddt->ddt_histogram[type][class]);
4.173 +
4.174 + if (dump_opt['D'] < 4)
4.175 + return;
4.176 +
4.177 + if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
4.178 + return;
4.179 +
4.180 + (void) printf("%s contents:\n\n", name);
4.181 +
4.182 + while ((error = ddt_object_walk(ddt, type, class, &dde, &walk)) == 0)
4.183 + dump_dde(ddt, &dde, walk);
4.184 +
4.185 + ASSERT(error == ENOENT);
4.186 +
4.187 + (void) printf("\n");
4.188 +}
4.189 +
4.190 +static void
4.191 +dump_all_ddts(spa_t *spa)
4.192 +{
4.193 + ddt_histogram_t ddh_total = { 0 };
4.194 + ddt_stat_t dds_total = { 0 };
4.195 +
4.196 + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
4.197 + ddt_t *ddt = spa->spa_ddt[c];
4.198 + for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
4.199 + for (enum ddt_class class = 0; class < DDT_CLASSES;
4.200 + class++) {
4.201 + ddt_histogram_add(&ddh_total,
4.202 + &ddt->ddt_histogram[type][class]);
4.203 + dump_ddt(ddt, type, class);
4.204 + }
4.205 + }
4.206 + }
4.207 +
4.208 + ddt_histogram_stat(&dds_total, &ddh_total);
4.209 +
4.210 + if (dds_total.dds_blocks == 0) {
4.211 + (void) printf("All DDTs are empty\n");
4.212 + return;
4.213 + }
4.214 +
4.215 + (void) printf("\n");
4.216 +
4.217 + if (dump_opt['D'] > 1) {
4.218 + (void) printf("DDT histogram (aggregated over all DDTs):\n");
4.219 + dump_ddt_histogram(&ddh_total);
4.220 + }
4.221 +
4.222 + dump_dedup_ratio(&dds_total);
4.223 +}
4.224 +
4.225 +static void
4.226 dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
4.227 {
4.228 char *prefix = (void *)sm;
4.229 @@ -658,35 +848,48 @@
4.230 }
4.231
4.232 static uint64_t
4.233 -blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
4.234 +blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
4.235 {
4.236 - if (level < 0)
4.237 - return (blkid);
4.238 + if (dnp == NULL) {
4.239 + ASSERT(zb->zb_level < 0);
4.240 + if (zb->zb_object == 0)
4.241 + return (zb->zb_blkid);
4.242 + return (zb->zb_blkid * BP_GET_LSIZE(bp));
4.243 + }
4.244
4.245 - return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
4.246 + ASSERT(zb->zb_level >= 0);
4.247 +
4.248 + return ((zb->zb_blkid <<
4.249 + (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
4.250 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
4.251 }
4.252
4.253 static void
4.254 -sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
4.255 +sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp)
4.256 {
4.257 dva_t *dva = bp->blk_dva;
4.258 - int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
4.259 - int i;
4.260 + int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
4.261 +
4.262 + if (dump_opt['b'] >= 5) {
4.263 + sprintf_blkptr(blkbuf, bp);
4.264 + return;
4.265 + }
4.266
4.267 blkbuf[0] = '\0';
4.268
4.269 - for (i = 0; i < ndvas; i++)
4.270 + for (int i = 0; i < ndvas; i++)
4.271 (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
4.272 (u_longlong_t)DVA_GET_VDEV(&dva[i]),
4.273 (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
4.274 (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
4.275
4.276 - (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
4.277 + (void) sprintf(blkbuf + strlen(blkbuf),
4.278 + "%llxL/%llxP F=%llu B=%llu/%llu",
4.279 (u_longlong_t)BP_GET_LSIZE(bp),
4.280 (u_longlong_t)BP_GET_PSIZE(bp),
4.281 (u_longlong_t)bp->blk_fill,
4.282 - (u_longlong_t)bp->blk_birth);
4.283 + (u_longlong_t)bp->blk_birth,
4.284 + (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
4.285 }
4.286
4.287 static void
4.288 @@ -699,8 +902,7 @@
4.289 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
4.290 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
4.291
4.292 - (void) printf("%16llx ",
4.293 - (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
4.294 + (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
4.295
4.296 ASSERT(zb->zb_level >= 0);
4.297
4.298 @@ -712,16 +914,8 @@
4.299 }
4.300 }
4.301
4.302 - sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
4.303 + sprintf_blkptr_compact(blkbuf, bp);
4.304 (void) printf("%s\n", blkbuf);
4.305 -}
4.306 -
4.307 -#define SET_BOOKMARK(zb, objset, object, level, blkid) \
4.308 -{ \
4.309 - (zb)->zb_objset = objset; \
4.310 - (zb)->zb_object = object; \
4.311 - (zb)->zb_level = level; \
4.312 - (zb)->zb_blkid = blkid; \
4.313 }
4.314
4.315 static int
4.316 @@ -859,7 +1053,7 @@
4.317 nicenum(ds->ds_compressed_bytes, compressed);
4.318 nicenum(ds->ds_uncompressed_bytes, uncompressed);
4.319 nicenum(ds->ds_unique_bytes, unique);
4.320 - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
4.321 + sprintf_blkptr(blkbuf, &ds->ds_bp);
4.322
4.323 (void) printf("\t\tdir_obj = %llu\n",
4.324 (u_longlong_t)ds->ds_dir_obj);
4.325 @@ -910,11 +1104,11 @@
4.326 if (dump_opt['d'] < 3)
4.327 return;
4.328
4.329 - mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
4.330 + bplist_init(&bpl);
4.331 VERIFY(0 == bplist_open(&bpl, mos, object));
4.332 if (bplist_empty(&bpl)) {
4.333 bplist_close(&bpl);
4.334 - mutex_destroy(&bpl.bpl_lock);
4.335 + bplist_fini(&bpl);
4.336 return;
4.337 }
4.338
4.339 @@ -932,7 +1126,7 @@
4.340
4.341 if (dump_opt['d'] < 5) {
4.342 bplist_close(&bpl);
4.343 - mutex_destroy(&bpl.bpl_lock);
4.344 + bplist_fini(&bpl);
4.345 return;
4.346 }
4.347
4.348 @@ -942,13 +1136,13 @@
4.349 char blkbuf[BP_SPRINTF_LEN];
4.350
4.351 ASSERT(bp->blk_birth != 0);
4.352 - sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
4.353 + sprintf_blkptr_compact(blkbuf, bp);
4.354 (void) printf("\tItem %3llu: %s\n",
4.355 (u_longlong_t)itor - 1, blkbuf);
4.356 }
4.357
4.358 bplist_close(&bpl);
4.359 - mutex_destroy(&bpl.bpl_lock);
4.360 + bplist_fini(&bpl);
4.361 }
4.362
4.363 static avl_tree_t idx_tree;
4.364 @@ -1107,6 +1301,8 @@
4.365 dump_zap, /* ZFS user/group used */
4.366 dump_zap, /* ZFS user/group quota */
4.367 dump_zap, /* snapshot refcount tags */
4.368 + dump_none, /* DDT ZAP object */
4.369 + dump_zap, /* DDT statistics */
4.370 dump_unknown /* Unknown type, must be last */
4.371 };
4.372
4.373 @@ -1118,13 +1314,14 @@
4.374 dnode_t *dn;
4.375 void *bonus = NULL;
4.376 size_t bsize = 0;
4.377 - char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
4.378 + char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7];
4.379 char aux[50];
4.380 int error;
4.381
4.382 if (*print_header) {
4.383 - (void) printf("\n Object lvl iblk dblk lsize"
4.384 - " asize type\n");
4.385 + (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
4.386 + "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
4.387 + "%full", "type");
4.388 *print_header = 0;
4.389 }
4.390
4.391 @@ -1143,10 +1340,11 @@
4.392
4.393 nicenum(doi.doi_metadata_block_size, iblk);
4.394 nicenum(doi.doi_data_block_size, dblk);
4.395 - nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
4.396 - lsize);
4.397 - nicenum(doi.doi_physical_blks << 9, asize);
4.398 + nicenum(doi.doi_max_offset, lsize);
4.399 + nicenum(doi.doi_physical_blocks_512 << 9, asize);
4.400 nicenum(doi.doi_bonus_size, bonus_size);
4.401 + (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
4.402 + doi.doi_data_block_size / doi.doi_max_offset);
4.403
4.404 aux[0] = '\0';
4.405
4.406 @@ -1160,13 +1358,13 @@
4.407 ZDB_COMPRESS_NAME(doi.doi_compress));
4.408 }
4.409
4.410 - (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
4.411 - (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
4.412 - asize, ZDB_OT_NAME(doi.doi_type), aux);
4.413 + (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
4.414 + (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
4.415 + asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
4.416
4.417 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
4.418 - (void) printf("%10s %3s %5s %5s %5s %5s %s\n",
4.419 - "", "", "", "", bonus_size, "bonus",
4.420 + (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
4.421 + "", "", "", "", "", bonus_size, "bonus",
4.422 ZDB_OT_NAME(doi.doi_bonus_type));
4.423 }
4.424
4.425 @@ -1203,6 +1401,7 @@
4.426 }
4.427
4.428 for (;;) {
4.429 + char segsize[6];
4.430 error = dnode_next_offset(dn,
4.431 0, &start, minlvl, blkfill, 0);
4.432 if (error)
4.433 @@ -1261,8 +1460,7 @@
4.434
4.435 if (verbosity >= 4) {
4.436 (void) sprintf(blkbuf, ", rootbp ");
4.437 - (void) sprintf_blkptr(blkbuf + strlen(blkbuf),
4.438 - BP_SPRINTF_LEN - strlen(blkbuf), os->os_rootbp);
4.439 + (void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
4.440 } else {
4.441 blkbuf[0] = '\0';
4.442 }
4.443 @@ -1275,7 +1473,16 @@
4.444 (u_longlong_t)dds.dds_creation_txg,
4.445 numbuf, (u_longlong_t)usedobjs, blkbuf);
4.446
4.447 - dump_intent_log(dmu_objset_zil(os));
4.448 + if (zopt_objects != 0) {
4.449 + for (i = 0; i < zopt_objects; i++)
4.450 + dump_object(os, zopt_object[i], verbosity,
4.451 + &print_header);
4.452 + (void) printf("\n");
4.453 + return;
4.454 + }
4.455 +
4.456 + if (dump_opt['i'] != 0 || verbosity >= 2)
4.457 + dump_intent_log(dmu_objset_zil(os));
4.458
4.459 if (dmu_objset_ds(os) != NULL)
4.460 dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
4.461 @@ -1286,14 +1493,6 @@
4.462
4.463 if (os->os_rootbp->blk_birth == 0)
4.464 return;
4.465 -
4.466 - if (zopt_objects != 0) {
4.467 - for (i = 0; i < zopt_objects; i++)
4.468 - dump_object(os, zopt_object[i], verbosity,
4.469 - &print_header);
4.470 - (void) printf("\n");
4.471 - return;
4.472 - }
4.473
4.474 dump_object(os, 0, verbosity, &print_header);
4.475 object_count = 0;
4.476 @@ -1333,7 +1532,7 @@
4.477 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
4.478 if (dump_opt['u'] >= 3) {
4.479 char blkbuf[BP_SPRINTF_LEN];
4.480 - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
4.481 + sprintf_blkptr(blkbuf, &ub->ub_rootbp);
4.482 (void) printf("\trootbp = %s\n", blkbuf);
4.483 }
4.484 (void) printf("\n");
4.485 @@ -1466,12 +1665,166 @@
4.486
4.487 error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
4.488 if (error) {
4.489 - (void) printf("Could not open %s\n", dsname);
4.490 + (void) printf("Could not open %s, error %d\n", dsname, error);
4.491 return (0);
4.492 }
4.493 dump_dir(os);
4.494 dmu_objset_disown(os, FTAG);
4.495 fuid_table_destroy();
4.496 + return (0);
4.497 +}
4.498 +
4.499 +/*
4.500 + * Block statistics.
4.501 + */
4.502 +typedef struct zdb_blkstats {
4.503 + uint64_t zb_asize;
4.504 + uint64_t zb_lsize;
4.505 + uint64_t zb_psize;
4.506 + uint64_t zb_count;
4.507 +} zdb_blkstats_t;
4.508 +
4.509 +/*
4.510 + * Extended object types to report deferred frees and dedup auto-ditto blocks.
4.511 + */
4.512 +#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
4.513 +#define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
4.514 +#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2)
4.515 +
4.516 +static char *zdb_ot_extname[] = {
4.517 + "deferred free",
4.518 + "dedup ditto",
4.519 + "Total",
4.520 +};
4.521 +
4.522 +#define ZB_TOTAL DN_MAX_LEVELS
4.523 +
4.524 +typedef struct zdb_cb {
4.525 + zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
4.526 + uint64_t zcb_dedup_asize;
4.527 + uint64_t zcb_dedup_blocks;
4.528 + uint64_t zcb_errors[256];
4.529 + int zcb_readfails;
4.530 + int zcb_haderrors;
4.531 +} zdb_cb_t;
4.532 +
4.533 +static void
4.534 +zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp,
4.535 + dmu_object_type_t type)
4.536 +{
4.537 + uint64_t refcnt = 0;
4.538 +
4.539 + ASSERT(type < ZDB_OT_TOTAL);
4.540 +
4.541 + if (zilog && zil_bp_tree_add(zilog, bp) != 0)
4.542 + return;
4.543 +
4.544 + for (int i = 0; i < 4; i++) {
4.545 + int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
4.546 + int t = (i & 1) ? type : ZDB_OT_TOTAL;
4.547 + zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
4.548 +
4.549 + zb->zb_asize += BP_GET_ASIZE(bp);
4.550 + zb->zb_lsize += BP_GET_LSIZE(bp);
4.551 + zb->zb_psize += BP_GET_PSIZE(bp);
4.552 + zb->zb_count++;
4.553 + }
4.554 +
4.555 + if (dump_opt['L'])
4.556 + return;
4.557 +
4.558 + if (BP_GET_DEDUP(bp)) {
4.559 + ddt_t *ddt;
4.560 + ddt_entry_t *dde;
4.561 +
4.562 + ddt = ddt_select(spa, bp);
4.563 + ddt_enter(ddt);
4.564 + dde = ddt_lookup(ddt, bp, B_FALSE);
4.565 +
4.566 + if (dde == NULL) {
4.567 + refcnt = 0;
4.568 + } else {
4.569 + ddt_phys_t *ddp = ddt_phys_select(dde, bp);
4.570 + ddt_phys_decref(ddp);
4.571 + refcnt = ddp->ddp_refcnt;
4.572 + if (ddt_phys_total_refcnt(dde) == 0)
4.573 + ddt_remove(ddt, dde);
4.574 + }
4.575 + ddt_exit(ddt);
4.576 + }
4.577 +
4.578 + VERIFY3U(zio_wait(zio_claim(NULL, spa,
4.579 + refcnt ? 0 : spa_first_txg(spa),
4.580 + bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
4.581 +}
4.582 +
4.583 +static int
4.584 +zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
4.585 + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
4.586 +{
4.587 + zdb_cb_t *zcb = arg;
4.588 + char blkbuf[BP_SPRINTF_LEN];
4.589 + dmu_object_type_t type;
4.590 + boolean_t is_metadata;
4.591 +
4.592 + if (bp == NULL)
4.593 + return (0);
4.594 +
4.595 + type = BP_GET_TYPE(bp);
4.596 +
4.597 + zdb_count_block(spa, zilog, zcb, bp, type);
4.598 +
4.599 + is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
4.600 +
4.601 + if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
4.602 + int ioerr;
4.603 + size_t size = BP_GET_PSIZE(bp);
4.604 + void *data = malloc(size);
4.605 + int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
4.606 +
4.607 + /* If it's an intent log block, failure is expected. */
4.608 + if (zb->zb_level == ZB_ZIL_LEVEL)
4.609 + flags |= ZIO_FLAG_SPECULATIVE;
4.610 +
4.611 + ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
4.612 + NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
4.613 +
4.614 + free(data);
4.615 +
4.616 + if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
4.617 + zcb->zcb_haderrors = 1;
4.618 + zcb->zcb_errors[ioerr]++;
4.619 +
4.620 + if (dump_opt['b'] >= 2)
4.621 + sprintf_blkptr(blkbuf, bp);
4.622 + else
4.623 + blkbuf[0] = '\0';
4.624 +
4.625 + (void) printf("zdb_blkptr_cb: "
4.626 + "Got error %d reading "
4.627 + "<%llu, %llu, %lld, %llx> %s -- skipping\n",
4.628 + ioerr,
4.629 + (u_longlong_t)zb->zb_objset,
4.630 + (u_longlong_t)zb->zb_object,
4.631 + (u_longlong_t)zb->zb_level,
4.632 + (u_longlong_t)zb->zb_blkid,
4.633 + blkbuf);
4.634 + }
4.635 + }
4.636 +
4.637 + zcb->zcb_readfails = 0;
4.638 +
4.639 + if (dump_opt['b'] >= 4) {
4.640 + sprintf_blkptr(blkbuf, bp);
4.641 + (void) printf("objset %llu object %llu "
4.642 + "level %lld offset 0x%llx %s\n",
4.643 + (u_longlong_t)zb->zb_objset,
4.644 + (u_longlong_t)zb->zb_object,
4.645 + (longlong_t)zb->zb_level,
4.646 + (u_longlong_t)blkid2offset(dnp, bp, zb),
4.647 + blkbuf);
4.648 + }
4.649 +
4.650 return (0);
4.651 }
4.652
4.653 @@ -1512,169 +1865,90 @@
4.654 };
4.655
4.656 static void
4.657 -zdb_leak_init(spa_t *spa)
4.658 +zdb_ddt_leak_init(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
4.659 + zdb_cb_t *zcb)
4.660 {
4.661 - vdev_t *rvd = spa->spa_root_vdev;
4.662 + uint64_t walk = 0;
4.663 + ddt_entry_t dde;
4.664 + int error;
4.665
4.666 - for (int c = 0; c < rvd->vdev_children; c++) {
4.667 - vdev_t *vd = rvd->vdev_child[c];
4.668 - for (int m = 0; m < vd->vdev_ms_count; m++) {
4.669 - metaslab_t *msp = vd->vdev_ms[m];
4.670 - mutex_enter(&msp->ms_lock);
4.671 - VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
4.672 - SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
4.673 - msp->ms_map.sm_ppd = vd;
4.674 - mutex_exit(&msp->ms_lock);
4.675 + if (class == DDT_CLASS_UNIQUE || !ddt_object_exists(ddt, type, class))
4.676 + return;
4.677 +
4.678 + while ((error = ddt_object_walk(ddt, type, class, &dde, &walk)) == 0) {
4.679 + blkptr_t blk;
4.680 + ddt_phys_t *ddp = dde.dde_phys;
4.681 + ASSERT(ddt_phys_total_refcnt(&dde) > 1);
4.682 + for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
4.683 + if (ddp->ddp_phys_birth == 0)
4.684 + continue;
4.685 + ddt_bp_create(ddt, &dde.dde_key, ddp, &blk);
4.686 + if (p == DDT_PHYS_DITTO) {
4.687 + zdb_count_block(ddt->ddt_spa, NULL, zcb, &blk,
4.688 + ZDB_OT_DITTO);
4.689 + } else {
4.690 + zcb->zcb_dedup_asize +=
4.691 + BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
4.692 + zcb->zcb_dedup_blocks++;
4.693 + }
4.694 + }
4.695 + if (!dump_opt['L']) {
4.696 + ddt_enter(ddt);
4.697 + VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
4.698 + ddt_exit(ddt);
4.699 }
4.700 }
4.701 +
4.702 + ASSERT(error == ENOENT);
4.703 +}
4.704 +
4.705 +static void
4.706 +zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
4.707 +{
4.708 + if (!dump_opt['L']) {
4.709 + vdev_t *rvd = spa->spa_root_vdev;
4.710 + for (int c = 0; c < rvd->vdev_children; c++) {
4.711 + vdev_t *vd = rvd->vdev_child[c];
4.712 + for (int m = 0; m < vd->vdev_ms_count; m++) {
4.713 + metaslab_t *msp = vd->vdev_ms[m];
4.714 + mutex_enter(&msp->ms_lock);
4.715 + space_map_unload(&msp->ms_map);
4.716 + VERIFY(space_map_load(&msp->ms_map,
4.717 + &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
4.718 + spa->spa_meta_objset) == 0);
4.719 + msp->ms_map.sm_ppd = vd;
4.720 + mutex_exit(&msp->ms_lock);
4.721 + }
4.722 + }
4.723 + }
4.724 +
4.725 + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4.726 +
4.727 + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++)
4.728 + for (enum ddt_type type = 0; type < DDT_TYPES; type++)
4.729 + for (enum ddt_class class = 0; class < DDT_CLASSES;
4.730 + class++)
4.731 + zdb_ddt_leak_init(spa->spa_ddt[c],
4.732 + type, class, zcb);
4.733 +
4.734 + spa_config_exit(spa, SCL_CONFIG, FTAG);
4.735 }
4.736
4.737 static void
4.738 zdb_leak_fini(spa_t *spa)
4.739 {
4.740 - vdev_t *rvd = spa->spa_root_vdev;
4.741 -
4.742 - for (int c = 0; c < rvd->vdev_children; c++) {
4.743 - vdev_t *vd = rvd->vdev_child[c];
4.744 - for (int m = 0; m < vd->vdev_ms_count; m++) {
4.745 - metaslab_t *msp = vd->vdev_ms[m];
4.746 - mutex_enter(&msp->ms_lock);
4.747 - space_map_unload(&msp->ms_map);
4.748 - mutex_exit(&msp->ms_lock);
4.749 - }
4.750 - }
4.751 -}
4.752 -
4.753 -/*
4.754 - * Verify that the sum of the sizes of all blocks in the pool adds up
4.755 - * to the SPA's sa_alloc total.
4.756 - */
4.757 -typedef struct zdb_blkstats {
4.758 - uint64_t zb_asize;
4.759 - uint64_t zb_lsize;
4.760 - uint64_t zb_psize;
4.761 - uint64_t zb_count;
4.762 -} zdb_blkstats_t;
4.763 -
4.764 -#define DMU_OT_DEFERRED DMU_OT_NONE
4.765 -#define DMU_OT_TOTAL DMU_OT_NUMTYPES
4.766 -
4.767 -#define ZB_TOTAL DN_MAX_LEVELS
4.768 -
4.769 -typedef struct zdb_cb {
4.770 - zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
4.771 - uint64_t zcb_errors[256];
4.772 - int zcb_readfails;
4.773 - int zcb_haderrors;
4.774 -} zdb_cb_t;
4.775 -
4.776 -static void
4.777 -zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
4.778 -{
4.779 - for (int i = 0; i < 4; i++) {
4.780 - int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
4.781 - int t = (i & 1) ? type : DMU_OT_TOTAL;
4.782 - zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
4.783 -
4.784 - zb->zb_asize += BP_GET_ASIZE(bp);
4.785 - zb->zb_lsize += BP_GET_LSIZE(bp);
4.786 - zb->zb_psize += BP_GET_PSIZE(bp);
4.787 - zb->zb_count++;
4.788 - }
4.789 -
4.790 - if (dump_opt['S']) {
4.791 - boolean_t print_sig;
4.792 -
4.793 - print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
4.794 - BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
4.795 -
4.796 - if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
4.797 - print_sig = B_FALSE;
4.798 -
4.799 - if (print_sig) {
4.800 - (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
4.801 - "%llx:%llx:%llx:%llx\n",
4.802 - (u_longlong_t)BP_GET_LEVEL(bp),
4.803 - (longlong_t)BP_GET_PSIZE(bp),
4.804 - (longlong_t)BP_GET_NDVAS(bp),
4.805 - ZDB_OT_NAME(BP_GET_TYPE(bp)),
4.806 - ZDB_CHECKSUM_NAME(BP_GET_CHECKSUM(bp)),
4.807 - ZDB_COMPRESS_NAME(BP_GET_COMPRESS(bp)),
4.808 - (u_longlong_t)bp->blk_cksum.zc_word[0],
4.809 - (u_longlong_t)bp->blk_cksum.zc_word[1],
4.810 - (u_longlong_t)bp->blk_cksum.zc_word[2],
4.811 - (u_longlong_t)bp->blk_cksum.zc_word[3]);
4.812 - }
4.813 - }
4.814 -
4.815 - if (!dump_opt['L'])
4.816 - VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
4.817 - NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
4.818 -}
4.819 -
4.820 -static int
4.821 -zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
4.822 - const dnode_phys_t *dnp, void *arg)
4.823 -{
4.824 - zdb_cb_t *zcb = arg;
4.825 - char blkbuf[BP_SPRINTF_LEN];
4.826 - dmu_object_type_t type;
4.827 - boolean_t is_metadata;
4.828 -
4.829 - if (bp == NULL)
4.830 - return (0);
4.831 -
4.832 - type = BP_GET_TYPE(bp);
4.833 -
4.834 - zdb_count_block(spa, zcb, bp, type);
4.835 -
4.836 - is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
4.837 -
4.838 - if (dump_opt['c'] > 1 || dump_opt['S'] ||
4.839 - (dump_opt['c'] && is_metadata)) {
4.840 - size_t size = BP_GET_PSIZE(bp);
4.841 - void *data = malloc(size);
4.842 - int ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
4.843 - NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
4.844 - ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
4.845 - free(data);
4.846 -
4.847 - /* We expect io errors on intent log */
4.848 - if (ioerr && type != DMU_OT_INTENT_LOG) {
4.849 - zcb->zcb_haderrors = 1;
4.850 - zcb->zcb_errors[ioerr]++;
4.851 -
4.852 - if (dump_opt['b'] >= 2)
4.853 - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
4.854 - else
4.855 - blkbuf[0] = '\0';
4.856 -
4.857 - if (!dump_opt['S']) {
4.858 - (void) printf("zdb_blkptr_cb: "
4.859 - "Got error %d reading "
4.860 - "<%llu, %llu, %lld, %llx> %s -- skipping\n",
4.861 - ioerr,
4.862 - (u_longlong_t)zb->zb_objset,
4.863 - (u_longlong_t)zb->zb_object,
4.864 - (u_longlong_t)zb->zb_level,
4.865 - (u_longlong_t)zb->zb_blkid,
4.866 - blkbuf);
4.867 + if (!dump_opt['L']) {
4.868 + vdev_t *rvd = spa->spa_root_vdev;
4.869 + for (int c = 0; c < rvd->vdev_children; c++) {
4.870 + vdev_t *vd = rvd->vdev_child[c];
4.871 + for (int m = 0; m < vd->vdev_ms_count; m++) {
4.872 + metaslab_t *msp = vd->vdev_ms[m];
4.873 + mutex_enter(&msp->ms_lock);
4.874 + space_map_unload(&msp->ms_map);
4.875 + mutex_exit(&msp->ms_lock);
4.876 }
4.877 }
4.878 }
4.879 -
4.880 - zcb->zcb_readfails = 0;
4.881 -
4.882 - if (dump_opt['b'] >= 4) {
4.883 - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
4.884 - (void) printf("objset %llu object %llu offset 0x%llx %s\n",
4.885 - (u_longlong_t)zb->zb_objset,
4.886 - (u_longlong_t)zb->zb_object,
4.887 - (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
4.888 - blkbuf);
4.889 - }
4.890 -
4.891 - return (0);
4.892 }
4.893
4.894 static int
4.895 @@ -1682,19 +1956,15 @@
4.896 {
4.897 zdb_cb_t zcb = { 0 };
4.898 zdb_blkstats_t *zb, *tzb;
4.899 - uint64_t alloc, space, logalloc;
4.900 - vdev_t *rvd = spa->spa_root_vdev;
4.901 + uint64_t norm_alloc, norm_space, total_alloc, total_found;
4.902 int leaks = 0;
4.903 - int c, e;
4.904
4.905 - if (!dump_opt['S']) {
4.906 - (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
4.907 - (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
4.908 - (dump_opt['c'] == 1) ? "metadata " : "",
4.909 - dump_opt['c'] ? "checksums " : "",
4.910 - (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
4.911 - !dump_opt['L'] ? "nothing leaked " : "");
4.912 - }
4.913 + (void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
4.914 + (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
4.915 + (dump_opt['c'] == 1) ? "metadata " : "",
4.916 + dump_opt['c'] ? "checksums " : "",
4.917 + (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
4.918 + !dump_opt['L'] ? "nothing leaked " : "");
4.919
4.920 /*
4.921 * Load all space maps as SM_ALLOC maps, then traverse the pool
4.922 @@ -1704,28 +1974,27 @@
4.923 * it's not part of any space map) is a double allocation,
4.924 * reference to a freed block, or an unclaimed log block.
4.925 */
4.926 - if (!dump_opt['L'])
4.927 - zdb_leak_init(spa);
4.928 + zdb_leak_init(spa, &zcb);
4.929
4.930 /*
4.931 * If there's a deferred-free bplist, process that first.
4.932 */
4.933 - if (spa->spa_sync_bplist_obj != 0) {
4.934 - bplist_t *bpl = &spa->spa_sync_bplist;
4.935 + if (spa->spa_deferred_bplist_obj != 0) {
4.936 + bplist_t *bpl = &spa->spa_deferred_bplist;
4.937 blkptr_t blk;
4.938 uint64_t itor = 0;
4.939
4.940 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
4.941 - spa->spa_sync_bplist_obj));
4.942 + spa->spa_deferred_bplist_obj));
4.943
4.944 while (bplist_iterate(bpl, &itor, &blk) == 0) {
4.945 if (dump_opt['b'] >= 4) {
4.946 char blkbuf[BP_SPRINTF_LEN];
4.947 - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
4.948 + sprintf_blkptr(blkbuf, &blk);
4.949 (void) printf("[%s] %s\n",
4.950 "deferred free", blkbuf);
4.951 }
4.952 - zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
4.953 + zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED);
4.954 }
4.955
4.956 bplist_close(bpl);
4.957 @@ -1733,10 +2002,10 @@
4.958
4.959 zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb, 0);
4.960
4.961 - if (zcb.zcb_haderrors && !dump_opt['S']) {
4.962 + if (zcb.zcb_haderrors) {
4.963 (void) printf("\nError counts:\n\n");
4.964 (void) printf("\t%5s %s\n", "errno", "count");
4.965 - for (e = 0; e < 256; e++) {
4.966 + for (int e = 0; e < 256; e++) {
4.967 if (zcb.zcb_errors[e] != 0) {
4.968 (void) printf("\t%5d %llu\n",
4.969 e, (u_longlong_t)zcb.zcb_errors[e]);
4.970 @@ -1747,43 +2016,27 @@
4.971 /*
4.972 * Report any leaked segments.
4.973 */
4.974 - if (!dump_opt['L'])
4.975 - zdb_leak_fini(spa);
4.976 + zdb_leak_fini(spa);
4.977
4.978 - /*
4.979 - * If we're interested in printing out the blkptr signatures,
4.980 - * return now as we don't print out anything else (including
4.981 - * errors and leaks).
4.982 - */
4.983 - if (dump_opt['S'])
4.984 - return (zcb.zcb_haderrors ? 3 : 0);
4.985 + tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
4.986
4.987 - alloc = spa_get_alloc(spa);
4.988 - space = spa_get_space(spa);
4.989 + norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
4.990 + norm_space = metaslab_class_get_space(spa_normal_class(spa));
4.991
4.992 - /*
4.993 - * Log blocks allocated from a separate log device don't count
4.994 - * as part of the normal pool space; factor them in here.
4.995 - */
4.996 - logalloc = 0;
4.997 + total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
4.998 + total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
4.999
4.1000 - for (c = 0; c < rvd->vdev_children; c++)
4.1001 - if (rvd->vdev_child[c]->vdev_islog)
4.1002 - logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
4.1003 -
4.1004 - tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
4.1005 -
4.1006 - if (tzb->zb_asize == alloc + logalloc) {
4.1007 + if (total_found == total_alloc) {
4.1008 if (!dump_opt['L'])
4.1009 (void) printf("\n\tNo leaks (block sum matches space"
4.1010 " maps exactly)\n");
4.1011 } else {
4.1012 (void) printf("block traversal size %llu != alloc %llu "
4.1013 "(%s %lld)\n",
4.1014 - (u_longlong_t)tzb->zb_asize,
4.1015 - (u_longlong_t)alloc + logalloc,
4.1016 + (u_longlong_t)total_found,
4.1017 + (u_longlong_t)total_alloc,
4.1018 (dump_opt['L']) ? "unreachable" : "leaked",
4.1019 - (longlong_t)(alloc + logalloc - tzb->zb_asize));
4.1020 + (longlong_t)(total_alloc - total_found));
4.1021 leaks = 1;
4.1022 }
4.1023
4.1024 @@ -1793,33 +2046,40 @@
4.1025 (void) printf("\n");
4.1026 (void) printf("\tbp count: %10llu\n",
4.1027 (u_longlong_t)tzb->zb_count);
4.1028 - (void) printf("\tbp logical: %10llu\t avg: %6llu\n",
4.1029 + (void) printf("\tbp logical: %10llu avg: %6llu\n",
4.1030 (u_longlong_t)tzb->zb_lsize,
4.1031 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
4.1032 - (void) printf("\tbp physical: %10llu\t avg:"
4.1033 - " %6llu\tcompression: %6.2f\n",
4.1034 + (void) printf("\tbp physical: %10llu avg:"
4.1035 + " %6llu compression: %6.2f\n",
4.1036 (u_longlong_t)tzb->zb_psize,
4.1037 (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
4.1038 (double)tzb->zb_lsize / tzb->zb_psize);
4.1039 - (void) printf("\tbp allocated: %10llu\t avg:"
4.1040 - " %6llu\tcompression: %6.2f\n",
4.1041 + (void) printf("\tbp allocated: %10llu avg:"
4.1042 + " %6llu compression: %6.2f\n",
4.1043 (u_longlong_t)tzb->zb_asize,
4.1044 (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
4.1045 (double)tzb->zb_lsize / tzb->zb_asize);
4.1046 - (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
4.1047 - (u_longlong_t)alloc, 100.0 * alloc / space);
4.1048 + (void) printf("\tbp deduped: %10llu ref>1:"
4.1049 + " %6llu deduplication: %6.2f\n",
4.1050 + (u_longlong_t)zcb.zcb_dedup_asize,
4.1051 + (u_longlong_t)zcb.zcb_dedup_blocks,
4.1052 + (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
4.1053 + (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
4.1054 + (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
4.1055
4.1056 if (dump_opt['b'] >= 2) {
4.1057 int l, t, level;
4.1058 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
4.1059 "\t avg\t comp\t%%Total\tType\n");
4.1060
4.1061 - for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
4.1062 + for (t = 0; t <= ZDB_OT_TOTAL; t++) {
4.1063 char csize[6], lsize[6], psize[6], asize[6], avg[6];
4.1064 char *typename;
4.1065
4.1066 - typename = t == DMU_OT_DEFERRED ? "deferred free" :
4.1067 - t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
4.1068 + if (t < DMU_OT_NUMTYPES)
4.1069 + typename = dmu_ot[t].ot_name;
4.1070 + else
4.1071 + typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
4.1072
4.1073 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
4.1074 (void) printf("%6s\t%5s\t%5s\t%5s"
4.1075 @@ -1881,11 +2141,115 @@
4.1076 return (0);
4.1077 }
4.1078
4.1079 +typedef struct zdb_ddt_entry {
4.1080 + ddt_key_t zdde_key;
4.1081 + uint64_t zdde_ref_blocks;
4.1082 + uint64_t zdde_ref_lsize;
4.1083 + uint64_t zdde_ref_psize;
4.1084 + uint64_t zdde_ref_dsize;
4.1085 + avl_node_t zdde_node;
4.1086 +} zdb_ddt_entry_t;
4.1087 +
4.1088 +/* ARGSUSED */
4.1089 +static int
4.1090 +zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
4.1091 + const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
4.1092 +{
4.1093 + avl_tree_t *t = arg;
4.1094 + avl_index_t where;
4.1095 + zdb_ddt_entry_t *zdde, zdde_search;
4.1096 +
4.1097 + if (bp == NULL)
4.1098 + return (0);
4.1099 +
4.1100 + if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
4.1101 + (void) printf("traversing objset %llu, %llu objects, "
4.1102 + "%lu blocks so far\n",
4.1103 + (u_longlong_t)zb->zb_objset,
4.1104 + (u_longlong_t)bp->blk_fill,
4.1105 + avl_numnodes(t));
4.1106 + }
4.1107 +
4.1108 + if (BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
4.1109 + return (0);
4.1110 +
4.1111 + ddt_key_fill(&zdde_search.zdde_key, bp);
4.1112 +
4.1113 + zdde = avl_find(t, &zdde_search, &where);
4.1114 +
4.1115 + if (zdde == NULL) {
4.1116 + zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
4.1117 + zdde->zdde_key = zdde_search.zdde_key;
4.1118 + avl_insert(t, zdde, where);
4.1119 + }
4.1120 +
4.1121 + zdde->zdde_ref_blocks += 1;
4.1122 + zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
4.1123 + zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
4.1124 + zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
4.1125 +
4.1126 + return (0);
4.1127 +}
4.1128 +
4.1129 +static void
4.1130 +dump_simulated_ddt(spa_t *spa)
4.1131 +{
4.1132 + avl_tree_t t;
4.1133 + void *cookie = NULL;
4.1134 + zdb_ddt_entry_t *zdde;
4.1135 + ddt_histogram_t ddh_total = { 0 };
4.1136 + ddt_stat_t dds_total = { 0 };
4.1137 +
4.1138 + avl_create(&t, ddt_entry_compare,
4.1139 + sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
4.1140 +
4.1141 + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4.1142 +
4.1143 + (void) traverse_pool(spa, zdb_ddt_add_cb, &t, 0);
4.1144 +
4.1145 + spa_config_exit(spa, SCL_CONFIG, FTAG);
4.1146 +
4.1147 + while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
4.1148 + ddt_stat_t dds;
4.1149 + uint64_t refcnt = zdde->zdde_ref_blocks;
4.1150 + ASSERT(refcnt != 0);
4.1151 +
4.1152 + dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
4.1153 + dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
4.1154 + dds.dds_psize = zdde->zdde_ref_psize / refcnt;
4.1155 + dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
4.1156 +
4.1157 + dds.dds_ref_blocks = zdde->zdde_ref_blocks;
4.1158 + dds.dds_ref_lsize = zdde->zdde_ref_lsize;
4.1159 + dds.dds_ref_psize = zdde->zdde_ref_psize;
4.1160 + dds.dds_ref_dsize = zdde->zdde_ref_dsize;
4.1161 +
4.1162 + ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
4.1163 +
4.1164 + umem_free(zdde, sizeof (*zdde));
4.1165 + }
4.1166 +
4.1167 + avl_destroy(&t);
4.1168 +
4.1169 + ddt_histogram_stat(&dds_total, &ddh_total);
4.1170 +
4.1171 + (void) printf("Simulated DDT histogram:\n");
4.1172 +
4.1173 + dump_ddt_histogram(&ddh_total);
4.1174 +
4.1175 + dump_dedup_ratio(&dds_total);
4.1176 +}
4.1177 +
4.1178 static void
4.1179 dump_zpool(spa_t *spa)
4.1180 {
4.1181 dsl_pool_t *dp = spa_get_dsl(spa);
4.1182 int rc = 0;
4.1183 +
4.1184 + if (dump_opt['S']) {
4.1185 + dump_simulated_ddt(spa);
4.1186 + return;
4.1187 + }
4.1188
4.1189 if (!dump_opt['e'] && dump_opt['C'] > 1) {
4.1190 (void) printf("\nCached configuration:\n");
4.1191 @@ -1898,6 +2262,9 @@
4.1192 if (dump_opt['u'])
4.1193 dump_uberblock(&spa->spa_uberblock);
4.1194
4.1195 + if (dump_opt['D'])
4.1196 + dump_all_ddts(spa);
4.1197 +
4.1198 if (dump_opt['d'] > 2 || dump_opt['m'])
4.1199 dump_metaslabs(spa);
4.1200
4.1201 @@ -1905,13 +2272,13 @@
4.1202 dump_dir(dp->dp_meta_objset);
4.1203 if (dump_opt['d'] >= 3) {
4.1204 dump_bplist(dp->dp_meta_objset,
4.1205 - spa->spa_sync_bplist_obj, "Deferred frees");
4.1206 + spa->spa_deferred_bplist_obj, "Deferred frees");
4.1207 dump_dtl(spa->spa_root_vdev, 0);
4.1208 }
4.1209 (void) dmu_objset_find(spa_name(spa), dump_one_dir,
4.1210 NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
4.1211 }
4.1212 - if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
4.1213 + if (dump_opt['b'] || dump_opt['c'])
4.1214 rc = dump_block_stats(spa);
4.1215
4.1216 if (dump_opt['s'])
4.1217 @@ -1938,51 +2305,13 @@
4.1218 static void
4.1219 zdb_print_blkptr(blkptr_t *bp, int flags)
4.1220 {
4.1221 - dva_t *dva = bp->blk_dva;
4.1222 - int d;
4.1223 + char blkbuf[BP_SPRINTF_LEN];
4.1224
4.1225 if (flags & ZDB_FLAG_BSWAP)
4.1226 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
4.1227 - /*
4.1228 - * Super-ick warning: This code is also duplicated in
4.1229 - * cmd/mdb/common/modules/zfs/zfs.c . Yeah, I hate code
4.1230 - * replication, too.
4.1231 - */
4.1232 - for (d = 0; d < BP_GET_NDVAS(bp); d++) {
4.1233 - (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
4.1234 - (longlong_t)DVA_GET_VDEV(&dva[d]),
4.1235 - (longlong_t)DVA_GET_OFFSET(&dva[d]));
4.1236 - (void) printf("\tDVA[%d]: GANG: %-5s GRID: %04llx\t"
4.1237 - "ASIZE: %llx\n", d,
4.1238 - DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
4.1239 - (longlong_t)DVA_GET_GRID(&dva[d]),
4.1240 - (longlong_t)DVA_GET_ASIZE(&dva[d]));
4.1241 - (void) printf("\tDVA[%d]: %llu:%llx:%llx:%s%s%s%s\n", d,
4.1242 - (u_longlong_t)DVA_GET_VDEV(&dva[d]),
4.1243 - (longlong_t)DVA_GET_OFFSET(&dva[d]),
4.1244 - (longlong_t)BP_GET_PSIZE(bp),
4.1245 - BP_SHOULD_BYTESWAP(bp) ? "e" : "",
4.1246 - !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
4.1247 - "d" : "",
4.1248 - DVA_GET_GANG(&dva[d]) ? "g" : "",
4.1249 - BP_GET_COMPRESS(bp) != 0 ? "d" : "");
4.1250 - }
4.1251 - (void) printf("\tLSIZE: %-16llx\t\tPSIZE: %llx\n",
4.1252 - (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
4.1253 - (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
4.1254 - BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
4.1255 - ZDB_OT_NAME(BP_GET_TYPE(bp)));
4.1256 - (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n",
4.1257 - (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
4.1258 - (u_longlong_t)bp->blk_fill);
4.1259 - (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n",
4.1260 - ZDB_CHECKSUM_NAME(BP_GET_CHECKSUM(bp)),
4.1261 - ZDB_COMPRESS_NAME(BP_GET_COMPRESS(bp)));
4.1262 - (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n",
4.1263 - (u_longlong_t)bp->blk_cksum.zc_word[0],
4.1264 - (u_longlong_t)bp->blk_cksum.zc_word[1],
4.1265 - (u_longlong_t)bp->blk_cksum.zc_word[2],
4.1266 - (u_longlong_t)bp->blk_cksum.zc_word[3]);
4.1267 +
4.1268 + sprintf_blkptr(blkbuf, bp);
4.1269 + (void) printf("%s\n", blkbuf);
4.1270 }
4.1271
4.1272 static void
4.1273 @@ -2005,7 +2334,7 @@
4.1274 {
4.1275 if (flags & ZDB_FLAG_BSWAP)
4.1276 byteswap_uint64_array(buf, size);
4.1277 - (void) write(2, buf, size);
4.1278 + (void) write(1, buf, size);
4.1279 }
4.1280
4.1281 static void
4.1282 @@ -2108,10 +2437,10 @@
4.1283 * flags - A string of characters specifying options
4.1284 * b: Decode a blkptr at given offset within block
4.1285 * *c: Calculate and display checksums
4.1286 - * *d: Decompress data before dumping
4.1287 + * d: Decompress data before dumping
4.1288 * e: Byteswap data before dumping
4.1289 - * *g: Display data as a gang block header
4.1290 - * *i: Display as an indirect block
4.1291 + * g: Display data as a gang block header
4.1292 + * i: Display as an indirect block
4.1293 * p: Do I/O to physical offset
4.1294 * r: Dump raw data to stdout
4.1295 *
4.1296 @@ -2120,13 +2449,15 @@
4.1297 static void
4.1298 zdb_read_block(char *thing, spa_t *spa)
4.1299 {
4.1300 + blkptr_t blk, *bp = &blk;
4.1301 + dva_t *dva = bp->blk_dva;
4.1302 int flags = 0;
4.1303 - uint64_t offset = 0, size = 0, blkptr_offset = 0;
4.1304 + uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
4.1305 zio_t *zio;
4.1306 vdev_t *vd;
4.1307 - void *buf;
4.1308 + void *pbuf, *lbuf, *buf;
4.1309 char *s, *p, *dup, *vdev, *flagstr;
4.1310 - int i, error, zio_flags;
4.1311 + int i, error;
4.1312
4.1313 dup = strdup(thing);
4.1314 s = strtok(dup, ":");
4.1315 @@ -2163,7 +2494,7 @@
4.1316 flags |= bit;
4.1317
4.1318 /* If it's not something with an argument, keep going */
4.1319 - if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
4.1320 + if ((bit & (ZDB_FLAG_CHECKSUM |
4.1321 ZDB_FLAG_PRINT_BLKPTR)) == 0)
4.1322 continue;
4.1323
4.1324 @@ -2185,28 +2516,110 @@
4.1325 return;
4.1326 } else {
4.1327 if (vd->vdev_path)
4.1328 - (void) printf("Found vdev: %s\n", vd->vdev_path);
4.1329 + (void) fprintf(stderr, "Found vdev: %s\n",
4.1330 + vd->vdev_path);
4.1331 else
4.1332 - (void) printf("Found vdev type: %s\n",
4.1333 + (void) fprintf(stderr, "Found vdev type: %s\n",
4.1334 vd->vdev_ops->vdev_op_type);
4.1335 }
4.1336
4.1337 - buf = umem_alloc(size, UMEM_NOFAIL);
4.1338 + psize = size;
4.1339 + lsize = size;
4.1340
4.1341 - zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
4.1342 - ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
4.1343 + pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4.1344 + lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4.1345 +
4.1346 + BP_ZERO(bp);
4.1347 +
4.1348 + DVA_SET_VDEV(&dva[0], vd->vdev_id);
4.1349 + DVA_SET_OFFSET(&dva[0], offset);
4.1350 + DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
4.1351 + DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
4.1352 +
4.1353 + BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
4.1354 +
4.1355 + BP_SET_LSIZE(bp, lsize);
4.1356 + BP_SET_PSIZE(bp, psize);
4.1357 + BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
4.1358 + BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
4.1359 + BP_SET_TYPE(bp, DMU_OT_NONE);
4.1360 + BP_SET_LEVEL(bp, 0);
4.1361 + BP_SET_DEDUP(bp, 0);
4.1362 + BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
4.1363
4.1364 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4.1365 zio = zio_root(spa, NULL, NULL, 0);
4.1366 - /* XXX todo - cons up a BP so RAID-Z will be happy */
4.1367 - zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
4.1368 - ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
4.1369 +
4.1370 + if (vd == vd->vdev_top) {
4.1371 + /*
4.1372 + * Treat this as a normal block read.
4.1373 + */
4.1374 + zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
4.1375 + ZIO_PRIORITY_SYNC_READ,
4.1376 + ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
4.1377 + } else {
4.1378 + /*
4.1379 + * Treat this as a vdev child I/O.
4.1380 + */
4.1381 + zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
4.1382 + ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
4.1383 + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
4.1384 + ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
4.1385 + ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
4.1386 + }
4.1387 +
4.1388 error = zio_wait(zio);
4.1389 spa_config_exit(spa, SCL_STATE, FTAG);
4.1390
4.1391 if (error) {
4.1392 (void) printf("Read of %s failed, error: %d\n", thing, error);
4.1393 goto out;
4.1394 + }
4.1395 +
4.1396 + if (flags & ZDB_FLAG_DECOMPRESS) {
4.1397 + /*
4.1398 + * We don't know how the data was compressed, so just try
4.1399 + * every decompress function at every inflated blocksize.
4.1400 + */
4.1401 + enum zio_compress c;
4.1402 + void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4.1403 + void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
4.1404 +
4.1405 + bcopy(pbuf, pbuf2, psize);
4.1406 +
4.1407 + VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
4.1408 + SPA_MAXBLOCKSIZE - psize) == 0);
4.1409 +
4.1410 + VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
4.1411 + SPA_MAXBLOCKSIZE - psize) == 0);
4.1412 +
4.1413 + for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
4.1414 + lsize -= SPA_MINBLOCKSIZE) {
4.1415 + for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
4.1416 + if (zio_decompress_data(c, pbuf, lbuf,
4.1417 + psize, lsize) == 0 &&
4.1418 + zio_decompress_data(c, pbuf2, lbuf2,
4.1419 + psize, lsize) == 0 &&
4.1420 + bcmp(lbuf, lbuf2, lsize) == 0)
4.1421 + break;
4.1422 + }
4.1423 + if (c != ZIO_COMPRESS_FUNCTIONS)
4.1424 + break;
4.1425 + lsize -= SPA_MINBLOCKSIZE;
4.1426 + }
4.1427 +
4.1428 + umem_free(pbuf2, SPA_MAXBLOCKSIZE);
4.1429 + umem_free(lbuf2, SPA_MAXBLOCKSIZE);
4.1430 +
4.1431 + if (lsize <= psize) {
4.1432 + (void) printf("Decompress of %s failed\n", thing);
4.1433 + goto out;
4.1434 + }
4.1435 + buf = lbuf;
4.1436 + size = lsize;
4.1437 + } else {
4.1438 + buf = pbuf;
4.1439 + size = psize;
4.1440 }
4.1441
4.1442 if (flags & ZDB_FLAG_PRINT_BLKPTR)
4.1443 @@ -2223,7 +2636,8 @@
4.1444 zdb_dump_block(thing, buf, size, flags);
4.1445
4.1446 out:
4.1447 - umem_free(buf, size);
4.1448 + umem_free(pbuf, SPA_MAXBLOCKSIZE);
4.1449 + umem_free(lbuf, SPA_MAXBLOCKSIZE);
4.1450 free(dup);
4.1451 }
4.1452
4.1453 @@ -2312,7 +2726,6 @@
4.1454 struct rlimit rl = { 1024, 1024 };
4.1455 spa_t *spa = NULL;
4.1456 objset_t *os = NULL;
4.1457 - char *endstr;
4.1458 int dump_all = 1;
4.1459 int verbose = 0;
4.1460 int error;
4.1461 @@ -2327,19 +2740,21 @@
4.1462
4.1463 dprintf_setup(&argc, argv);
4.1464
4.1465 - while ((c = getopt(argc, argv, "udhibcmsvCLS:RU:lep:t:")) != -1) {
4.1466 + while ((c = getopt(argc, argv, "bcdhilmsuCDRSLevp:t:U:")) != -1) {
4.1467 switch (c) {
4.1468 - case 'u':
4.1469 - case 'd':
4.1470 - case 'i':
4.1471 - case 'h':
4.1472 case 'b':
4.1473 case 'c':
4.1474 + case 'd':
4.1475 + case 'h':
4.1476 + case 'i':
4.1477 + case 'l':
4.1478 case 'm':
4.1479 case 's':
4.1480 + case 'u':
4.1481 case 'C':
4.1482 - case 'l':
4.1483 + case 'D':
4.1484 case 'R':
4.1485 + case 'S':
4.1486 dump_opt[c]++;
4.1487 dump_all = 0;
4.1488 break;
4.1489 @@ -2349,9 +2764,6 @@
4.1490 break;
4.1491 case 'v':
4.1492 verbose++;
4.1493 - break;
4.1494 - case 'U':
4.1495 - spa_config_path = optarg;
4.1496 break;
4.1497 case 'p':
4.1498 if (searchdirs == NULL) {
4.1499 @@ -2368,24 +2780,6 @@
4.1500 }
4.1501 searchdirs[nsearch++] = optarg;
4.1502 break;
4.1503 - case 'S':
4.1504 - dump_opt[c]++;
4.1505 - dump_all = 0;
4.1506 - zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
4.1507 - if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
4.1508 - usage();
4.1509 - endstr = strchr(optarg, ':') + 1;
4.1510 - if (strcmp(endstr, "fletcher2") == 0)
4.1511 - zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
4.1512 - else if (strcmp(endstr, "fletcher4") == 0)
4.1513 - zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
4.1514 - else if (strcmp(endstr, "sha256") == 0)
4.1515 - zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
4.1516 - else if (strcmp(endstr, "all") == 0)
4.1517 - zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
4.1518 - else
4.1519 - usage();
4.1520 - break;
4.1521 case 't':
4.1522 max_txg = strtoull(optarg, NULL, 0);
4.1523 if (max_txg < TXG_INITIAL) {
4.1524 @@ -2393,6 +2787,9 @@
4.1525 "specified: %s\n", optarg);
4.1526 usage();
4.1527 }
4.1528 + break;
4.1529 + case 'U':
4.1530 + spa_config_path = optarg;
4.1531 break;
4.1532 default:
4.1533 usage();
4.1534 @@ -2409,8 +2806,11 @@
4.1535 g_zfs = libzfs_init();
4.1536 ASSERT(g_zfs != NULL);
4.1537
4.1538 + if (dump_all)
4.1539 + verbose = MAX(verbose, 1);
4.1540 +
4.1541 for (c = 0; c < 256; c++) {
4.1542 - if (dump_all && !strchr("elLR", c))
4.1543 + if (dump_all && !strchr("elLRS", c))
4.1544 dump_opt[c] = 1;
4.1545 if (dump_opt[c])
4.1546 dump_opt[c] += verbose;
5.1 --- a/usr/src/cmd/zdb/zdb_il.c Fri Oct 30 18:47:17 2009 -0600
5.2 +++ b/usr/src/cmd/zdb/zdb_il.c Sun Nov 01 14:14:46 2009 -0800
5.3 @@ -40,12 +40,14 @@
5.4
5.5 extern uint8_t dump_opt[256];
5.6
5.7 +static char prefix[4] = "\t\t\t";
5.8 +
5.9 static void
5.10 print_log_bp(const blkptr_t *bp, const char *prefix)
5.11 {
5.12 char blkbuf[BP_SPRINTF_LEN];
5.13
5.14 - sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
5.15 + sprintf_blkptr(blkbuf, bp);
5.16 (void) printf("%s%s\n", prefix, blkbuf);
5.17 }
5.18
5.19 @@ -58,15 +60,15 @@
5.20 char *link = name + strlen(name) + 1;
5.21
5.22 if (txtype == TX_SYMLINK)
5.23 - (void) printf("\t\t\t%s -> %s\n", name, link);
5.24 + (void) printf("%s%s -> %s\n", prefix, name, link);
5.25 else
5.26 - (void) printf("\t\t\t%s\n", name);
5.27 + (void) printf("%s%s\n", prefix, name);
5.28
5.29 - (void) printf("\t\t\t%s", ctime(&crtime));
5.30 - (void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
5.31 + (void) printf("%s%s", prefix, ctime(&crtime));
5.32 + (void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
5.33 (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
5.34 (longlong_t)lr->lr_mode);
5.35 - (void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
5.36 + (void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
5.37 (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
5.38 (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
5.39 }
5.40 @@ -75,7 +77,7 @@
5.41 static void
5.42 zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
5.43 {
5.44 - (void) printf("\t\t\tdoid %llu, name %s\n",
5.45 + (void) printf("%sdoid %llu, name %s\n", prefix,
5.46 (u_longlong_t)lr->lr_doid, (char *)(lr + 1));
5.47 }
5.48
5.49 @@ -83,7 +85,7 @@
5.50 static void
5.51 zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
5.52 {
5.53 - (void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
5.54 + (void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
5.55 (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
5.56 (char *)(lr + 1));
5.57 }
5.58 @@ -95,9 +97,9 @@
5.59 char *snm = (char *)(lr + 1);
5.60 char *tnm = snm + strlen(snm) + 1;
5.61
5.62 - (void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
5.63 + (void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
5.64 (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
5.65 - (void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
5.66 + (void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
5.67 }
5.68
5.69 /* ARGSUSED */
5.70 @@ -106,44 +108,48 @@
5.71 {
5.72 char *data, *dlimit;
5.73 blkptr_t *bp = &lr->lr_blkptr;
5.74 + zbookmark_t zb;
5.75 char buf[SPA_MAXBLOCKSIZE];
5.76 int verbose = MAX(dump_opt['d'], dump_opt['i']);
5.77 int error;
5.78
5.79 - (void) printf("\t\t\tfoid %llu, offset 0x%llx,"
5.80 - " length 0x%llx, blkoff 0x%llx\n",
5.81 - (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
5.82 - (u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
5.83 + (void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
5.84 + (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
5.85 + (u_longlong_t)lr->lr_length);
5.86
5.87 if (txtype == TX_WRITE2 || verbose < 5)
5.88 return;
5.89
5.90 if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
5.91 - (void) printf("\t\t\thas blkptr, %s\n",
5.92 + (void) printf("%shas blkptr, %s\n", prefix,
5.93 bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
5.94 "will claim" : "won't claim");
5.95 - print_log_bp(bp, "\t\t\t");
5.96 + print_log_bp(bp, prefix);
5.97 +
5.98 if (BP_IS_HOLE(bp)) {
5.99 (void) printf("\t\t\tLSIZE 0x%llx\n",
5.100 (u_longlong_t)BP_GET_LSIZE(bp));
5.101 }
5.102 if (bp->blk_birth == 0) {
5.103 bzero(buf, sizeof (buf));
5.104 - } else {
5.105 - zbookmark_t zb;
5.106 + (void) printf("%s<hole>\n", prefix);
5.107 + return;
5.108 + }
5.109 + if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
5.110 + (void) printf("%s<block already committed>\n", prefix);
5.111 + return;
5.112 + }
5.113
5.114 - zb.zb_objset = dmu_objset_id(zilog->zl_os);
5.115 - zb.zb_object = lr->lr_foid;
5.116 - zb.zb_level = 0;
5.117 - zb.zb_blkid = -1; /* unknown */
5.118 + SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
5.119 + lr->lr_foid, ZB_ZIL_LEVEL,
5.120 + lr->lr_offset / BP_GET_LSIZE(bp));
5.121
5.122 - error = zio_wait(zio_read(NULL, zilog->zl_spa,
5.123 - bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
5.124 - ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
5.125 - if (error)
5.126 - return;
5.127 - }
5.128 - data = buf + lr->lr_blkoff;
5.129 + error = zio_wait(zio_read(NULL, zilog->zl_spa,
5.130 + bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
5.131 + ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
5.132 + if (error)
5.133 + return;
5.134 + data = buf;
5.135 } else {
5.136 data = (char *)(lr + 1);
5.137 }
5.138 @@ -151,7 +157,7 @@
5.139 dlimit = data + MIN(lr->lr_length,
5.140 (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
5.141
5.142 - (void) printf("\t\t\t");
5.143 + (void) printf("%s", prefix);
5.144 while (data < dlimit) {
5.145 if (isprint(*data))
5.146 (void) printf("%c ", *data);
5.147 @@ -166,7 +172,7 @@
5.148 static void
5.149 zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
5.150 {
5.151 - (void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
5.152 + (void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
5.153 (u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
5.154 (u_longlong_t)lr->lr_length);
5.155 }
5.156 @@ -178,38 +184,38 @@
5.157 time_t atime = (time_t)lr->lr_atime[0];
5.158 time_t mtime = (time_t)lr->lr_mtime[0];
5.159
5.160 - (void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
5.161 + (void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
5.162 (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
5.163
5.164 if (lr->lr_mask & AT_MODE) {
5.165 - (void) printf("\t\t\tAT_MODE %llo\n",
5.166 + (void) printf("%sAT_MODE %llo\n", prefix,
5.167 (longlong_t)lr->lr_mode);
5.168 }
5.169
5.170 if (lr->lr_mask & AT_UID) {
5.171 - (void) printf("\t\t\tAT_UID %llu\n",
5.172 + (void) printf("%sAT_UID %llu\n", prefix,
5.173 (u_longlong_t)lr->lr_uid);
5.174 }
5.175
5.176 if (lr->lr_mask & AT_GID) {
5.177 - (void) printf("\t\t\tAT_GID %llu\n",
5.178 + (void) printf("%sAT_GID %llu\n", prefix,
5.179 (u_longlong_t)lr->lr_gid);
5.180 }
5.181
5.182 if (lr->lr_mask & AT_SIZE) {
5.183 - (void) printf("\t\t\tAT_SIZE %llu\n",
5.184 + (void) printf("%sAT_SIZE %llu\n", prefix,
5.185 (u_longlong_t)lr->lr_size);
5.186 }
5.187
5.188 if (lr->lr_mask & AT_ATIME) {
5.189 - (void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
5.190 + (void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
5.191 (u_longlong_t)lr->lr_atime[0],
5.192 (u_longlong_t)lr->lr_atime[1],
5.193 ctime(&atime));
5.194 }
5.195
5.196 if (lr->lr_mask & AT_MTIME) {
5.197 - (void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
5.198 + (void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
5.199 (u_longlong_t)lr->lr_mtime[0],
5.200 (u_longlong_t)lr->lr_mtime[1],
5.201 ctime(&mtime));
5.202 @@ -220,7 +226,7 @@
5.203 static void
5.204 zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
5.205 {
5.206 - (void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
5.207 + (void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
5.208 (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
5.209 }
5.210
5.211 @@ -256,7 +262,7 @@
5.212 };
5.213
5.214 /* ARGSUSED */
5.215 -static void
5.216 +static int
5.217 print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
5.218 {
5.219 int txtype;
5.220 @@ -280,23 +286,24 @@
5.221
5.222 zil_rec_info[txtype].zri_count++;
5.223 zil_rec_info[0].zri_count++;
5.224 +
5.225 + return (0);
5.226 }
5.227
5.228 /* ARGSUSED */
5.229 -static void
5.230 +static int
5.231 print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
5.232 {
5.233 - char blkbuf[BP_SPRINTF_LEN];
5.234 + char blkbuf[BP_SPRINTF_LEN + 10];
5.235 int verbose = MAX(dump_opt['d'], dump_opt['i']);
5.236 char *claim;
5.237
5.238 if (verbose <= 3)
5.239 - return;
5.240 + return (0);
5.241
5.242 if (verbose >= 5) {
5.243 (void) strcpy(blkbuf, ", ");
5.244 - sprintf_blkptr(blkbuf + strlen(blkbuf),
5.245 - BP_SPRINTF_LEN - strlen(blkbuf), bp);
5.246 + sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
5.247 } else {
5.248 blkbuf[0] = '\0';
5.249 }
5.250 @@ -310,6 +317,8 @@
5.251
5.252 (void) printf("\tBlock seqno %llu, %s%s\n",
5.253 (u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
5.254 +
5.255 + return (0);
5.256 }
5.257
5.258 static void
5.259 @@ -342,16 +351,16 @@
5.260 int verbose = MAX(dump_opt['d'], dump_opt['i']);
5.261 int i;
5.262
5.263 - if (zh->zh_log.blk_birth == 0 || verbose < 2)
5.264 + if (zh->zh_log.blk_birth == 0 || verbose < 1)
5.265 return;
5.266
5.267 - (void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
5.268 - (u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
5.269 + (void) printf("\n ZIL header: claim_txg %llu, "
5.270 + "claim_blk_seq %llu, claim_lr_seq %llu",
5.271 + (u_longlong_t)zh->zh_claim_txg,
5.272 + (u_longlong_t)zh->zh_claim_blk_seq,
5.273 + (u_longlong_t)zh->zh_claim_lr_seq);
5.274 (void) printf(" replay_seq %llu, flags 0x%llx\n",
5.275 (u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
5.276 -
5.277 - if (verbose >= 4)
5.278 - print_log_bp(&zh->zh_log, "\n\tfirst block: ");
5.279
5.280 for (i = 0; i < TX_MAX_TYPE; i++)
5.281 zil_rec_info[i].zri_count = 0;
6.1 --- a/usr/src/cmd/zpool/zpool_main.c Fri Oct 30 18:47:17 2009 -0600
6.2 +++ b/usr/src/cmd/zpool/zpool_main.c Sun Nov 01 14:14:46 2009 -0800
6.3 @@ -250,12 +250,12 @@
6.4 {
6.5 FILE *fp = cb;
6.6
6.7 - (void) fprintf(fp, "\t%-13s ", zpool_prop_to_name(prop));
6.8 + (void) fprintf(fp, "\t%-15s ", zpool_prop_to_name(prop));
6.9
6.10 if (zpool_prop_readonly(prop))
6.11 (void) fprintf(fp, " NO ");
6.12 else
6.13 - (void) fprintf(fp, " YES ");
6.14 + (void) fprintf(fp, " YES ");
6.15
6.16 if (zpool_prop_values(prop) == NULL)
6.17 (void) fprintf(fp, "-\n");
6.18 @@ -302,7 +302,7 @@
6.19 (void) fprintf(fp,
6.20 gettext("\nthe following properties are supported:\n"));
6.21
6.22 - (void) fprintf(fp, "\n\t%-13s %s %s\n\n",
6.23 + (void) fprintf(fp, "\n\t%-15s %s %s\n\n",
6.24 "PROPERTY", "EDIT", "VALUES");
6.25
6.26 /* Iterate over all properties */
6.27 @@ -2449,7 +2449,7 @@
6.28 int ret;
6.29 list_cbdata_t cb = { 0 };
6.30 static char default_props[] =
6.31 - "name,size,used,available,capacity,health,altroot";
6.32 + "name,size,used,available,capacity,dedupratio,health,altroot";
6.33 char *props = default_props;
6.34
6.35 /* check options */
6.36 @@ -3672,9 +3672,12 @@
6.37 (void) printf(gettext(" 15 user/group space accounting\n"));
6.38 (void) printf(gettext(" 16 stmf property support\n"));
6.39 (void) printf(gettext(" 17 Triple-parity RAID-Z\n"));
6.40 - (void) printf(gettext(" 18 snapshot user holds\n"));
6.41 + (void) printf(gettext(" 18 Snapshot user holds\n"));
6.42 (void) printf(gettext(" 19 Log device removal\n"));
6.43 - (void) printf(gettext("For more information on a particular "
6.44 + (void) printf(gettext(" 20 Compression using zle "
6.45 + "(zero-length encoding)\n"));
6.46 + (void) printf(gettext(" 21 Deduplication\n"));
6.47 + (void) printf(gettext("\nFor more information on a particular "
6.48 "version, including supported releases, see:\n\n"));
6.49 (void) printf("http://www.opensolaris.org/os/community/zfs/"
6.50 "version/N\n\n");
7.1 --- a/usr/src/cmd/ztest/ztest.c Fri Oct 30 18:47:17 2009 -0600
7.2 +++ b/usr/src/cmd/ztest/ztest.c Sun Nov 01 14:14:46 2009 -0800
7.3 @@ -86,9 +86,8 @@
7.4 #include <sys/mman.h>
7.5 #include <sys/resource.h>
7.6 #include <sys/zio.h>
7.7 -#include <sys/zio_checksum.h>
7.8 -#include <sys/zio_compress.h>
7.9 #include <sys/zil.h>
7.10 +#include <sys/zil_impl.h>
7.11 #include <sys/vdev_impl.h>
7.12 #include <sys/vdev_file.h>
7.13 #include <sys/spa_impl.h>
7.14 @@ -106,6 +105,7 @@
7.15 #include <ctype.h>
7.16 #include <math.h>
7.17 #include <sys/fs/zfs.h>
7.18 +#include <libnvpair.h>
7.19
7.20 static char cmdname[] = "ztest";
7.21 static char *zopt_pool = cmdname;
7.22 @@ -127,112 +127,171 @@
7.23 static uint64_t zopt_time = 300; /* 5 minutes */
7.24 static int zopt_maxfaults;
7.25
7.26 +#define BT_MAGIC 0x123456789abcdefULL
7.27 +
7.28 +enum ztest_io_type {
7.29 + ZTEST_IO_WRITE_TAG,
7.30 + ZTEST_IO_WRITE_PATTERN,
7.31 + ZTEST_IO_WRITE_ZEROES,
7.32 + ZTEST_IO_TRUNCATE,
7.33 + ZTEST_IO_SETATTR,
7.34 + ZTEST_IO_TYPES
7.35 +};
7.36 +
7.37 typedef struct ztest_block_tag {
7.38 + uint64_t bt_magic;
7.39 uint64_t bt_objset;
7.40 uint64_t bt_object;
7.41 uint64_t bt_offset;
7.42 + uint64_t bt_gen;
7.43 uint64_t bt_txg;
7.44 - uint64_t bt_thread;
7.45 - uint64_t bt_seq;
7.46 + uint64_t bt_crtxg;
7.47 } ztest_block_tag_t;
7.48
7.49 -typedef struct ztest_args {
7.50 - char za_pool[MAXNAMELEN];
7.51 - spa_t *za_spa;
7.52 - objset_t *za_os;
7.53 - zilog_t *za_zilog;
7.54 - thread_t za_thread;
7.55 - uint64_t za_instance;
7.56 - uint64_t za_random;
7.57 - uint64_t za_diroff;
7.58 - uint64_t za_diroff_shared;
7.59 - uint64_t za_zil_seq;
7.60 - hrtime_t za_start;
7.61 - hrtime_t za_stop;
7.62 - hrtime_t za_kill;
7.63 - /*
7.64 - * Thread-local variables can go here to aid debugging.
7.65 - */
7.66 - ztest_block_tag_t za_rbt;
7.67 - ztest_block_tag_t za_wbt;
7.68 - dmu_object_info_t za_doi;
7.69 - dmu_buf_t *za_dbuf;
7.70 -} ztest_args_t;
7.71 -
7.72 -typedef void ztest_func_t(ztest_args_t *);
7.73 +typedef struct bufwad {
7.74 + uint64_t bw_index;
7.75 + uint64_t bw_txg;
7.76 + uint64_t bw_data;
7.77 +} bufwad_t;
7.78 +
7.79 +/*
7.80 + * XXX -- fix zfs range locks to be generic so we can use them here.
7.81 + */
7.82 +typedef enum {
7.83 + RL_READER,
7.84 + RL_WRITER,
7.85 + RL_APPEND
7.86 +} rl_type_t;
7.87 +
7.88 +typedef struct rll {
7.89 + void *rll_writer;
7.90 + int rll_readers;
7.91 + mutex_t rll_lock;
7.92 + cond_t rll_cv;
7.93 +} rll_t;
7.94 +
7.95 +typedef struct rl {
7.96 + uint64_t rl_object;
7.97 + uint64_t rl_offset;
7.98 + uint64_t rl_size;
7.99 + rll_t *rl_lock;
7.100 +} rl_t;
7.101 +
7.102 +#define ZTEST_RANGE_LOCKS 64
7.103 +#define ZTEST_OBJECT_LOCKS 64
7.104 +
7.105 +/*
7.106 + * Object descriptor. Used as a template for object lookup/create/remove.
7.107 + */
7.108 +typedef struct ztest_od {
7.109 + uint64_t od_dir;
7.110 + uint64_t od_object;
7.111 + dmu_object_type_t od_type;
7.112 + dmu_object_type_t od_crtype;
7.113 + uint64_t od_blocksize;
7.114 + uint64_t od_crblocksize;
7.115 + uint64_t od_gen;
7.116 + uint64_t od_crgen;
7.117 + char od_name[MAXNAMELEN];
7.118 +} ztest_od_t;
7.119 +
7.120 +/*
7.121 + * Per-dataset state.
7.122 + */
7.123 +typedef struct ztest_ds {
7.124 + objset_t *zd_os;
7.125 + zilog_t *zd_zilog;
7.126 + uint64_t zd_seq;
7.127 + ztest_od_t *zd_od; /* debugging aid */
7.128 + char zd_name[MAXNAMELEN];
7.129 + mutex_t zd_dirobj_lock;
7.130 + rll_t zd_object_lock[ZTEST_OBJECT_LOCKS];
7.131 + rll_t zd_range_lock[ZTEST_RANGE_LOCKS];
7.132 +} ztest_ds_t;
7.133 +
7.134 +/*
7.135 + * Per-iteration state.
7.136 + */
7.137 +typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
7.138 +
7.139 +typedef struct ztest_info {
7.140 + ztest_func_t *zi_func; /* test function */
7.141 + uint64_t zi_iters; /* iterations per execution */
7.142 + uint64_t *zi_interval; /* execute every <interval> seconds */
7.143 + uint64_t zi_call_count; /* per-pass count */
7.144 + uint64_t zi_call_time; /* per-pass time */
7.145 + uint64_t zi_call_next; /* next time to call this function */
7.146 +} ztest_info_t;
7.147
7.148 /*
7.149 * Note: these aren't static because we want dladdr() to work.
7.150 */
7.151 ztest_func_t ztest_dmu_read_write;
7.152 -ztest_func_t ztest_dmu_read_write_zcopy;
7.153 ztest_func_t ztest_dmu_write_parallel;
7.154 ztest_func_t ztest_dmu_object_alloc_free;
7.155 ztest_func_t ztest_dmu_commit_callbacks;
7.156 ztest_func_t ztest_zap;
7.157 +ztest_func_t ztest_zap_parallel;
7.158 +ztest_func_t ztest_zil_commit;
7.159 +ztest_func_t ztest_dmu_read_write_zcopy;
7.160 +ztest_func_t ztest_dmu_objset_create_destroy;
7.161 +ztest_func_t ztest_dmu_prealloc;
7.162 ztest_func_t ztest_fzap;
7.163 -ztest_func_t ztest_zap_parallel;
7.164 -ztest_func_t ztest_traverse;
7.165 +ztest_func_t ztest_dmu_snapshot_create_destroy;
7.166 ztest_func_t ztest_dsl_prop_get_set;
7.167 -ztest_func_t ztest_dmu_objset_create_destroy;
7.168 -ztest_func_t ztest_dmu_snapshot_create_destroy;
7.169 -ztest_func_t ztest_dsl_dataset_promote_busy;
7.170 +ztest_func_t ztest_spa_prop_get_set;
7.171 ztest_func_t ztest_spa_create_destroy;
7.172 ztest_func_t ztest_fault_inject;
7.173 +ztest_func_t ztest_ddt_repair;
7.174 +ztest_func_t ztest_dmu_snapshot_hold;
7.175 ztest_func_t ztest_spa_rename;
7.176 +ztest_func_t ztest_scrub;
7.177 +ztest_func_t ztest_dsl_dataset_promote_busy;
7.178 ztest_func_t ztest_vdev_attach_detach;
7.179 ztest_func_t ztest_vdev_LUN_growth;
7.180 ztest_func_t ztest_vdev_add_remove;
7.181 ztest_func_t ztest_vdev_aux_add_remove;
7.182 -ztest_func_t ztest_scrub;
7.183 -ztest_func_t ztest_dmu_snapshot_hold;
7.184 -
7.185 -typedef struct ztest_info {
7.186 - ztest_func_t *zi_func; /* test function */
7.187 - uint64_t zi_iters; /* iterations per execution */
7.188 - uint64_t *zi_interval; /* execute every <interval> seconds */
7.189 - uint64_t zi_calls; /* per-pass count */
7.190 - uint64_t zi_call_time; /* per-pass time */
7.191 - uint64_t zi_call_total; /* cumulative total */
7.192 - uint64_t zi_call_target; /* target cumulative total */
7.193 -} ztest_info_t;
7.194 -
7.195 -uint64_t zopt_always = 0; /* all the time */
7.196 -uint64_t zopt_often = 1; /* every second */
7.197 -uint64_t zopt_sometimes = 10; /* every 10 seconds */
7.198 -uint64_t zopt_rarely = 60; /* every 60 seconds */
7.199 +
7.200 +uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
7.201 +uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
7.202 +uint64_t zopt_often = 1ULL * NANOSEC; /* every second */
7.203 +uint64_t zopt_sometimes = 10ULL * NANOSEC; /* every 10 seconds */
7.204 +uint64_t zopt_rarely = 60ULL * NANOSEC; /* every 60 seconds */
7.205
7.206 ztest_info_t ztest_info[] = {
7.207 { ztest_dmu_read_write, 1, &zopt_always },
7.208 - { ztest_dmu_write_parallel, 30, &zopt_always },
7.209 + { ztest_dmu_write_parallel, 10, &zopt_always },
7.210 { ztest_dmu_object_alloc_free, 1, &zopt_always },
7.211 - { ztest_dmu_commit_callbacks, 10, &zopt_always },
7.212 + { ztest_dmu_commit_callbacks, 1, &zopt_always },
7.213 { ztest_zap, 30, &zopt_always },
7.214 - { ztest_fzap, 1, &zopt_always },
7.215 { ztest_zap_parallel, 100, &zopt_always },
7.216 - { ztest_dmu_read_write_zcopy, 1, &zopt_sometimes },
7.217 - { ztest_dsl_prop_get_set, 1, &zopt_sometimes },
7.218 - { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
7.219 - { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
7.220 - { ztest_spa_create_destroy, 1, &zopt_sometimes },
7.221 + { ztest_zil_commit, 1, &zopt_incessant },
7.222 + { ztest_dmu_read_write_zcopy, 1, &zopt_often },
7.223 + { ztest_dmu_objset_create_destroy, 1, &zopt_often },
7.224 + { ztest_dsl_prop_get_set, 1, &zopt_often },
7.225 + { ztest_spa_prop_get_set, 1, &zopt_sometimes },
7.226 +#if 0
7.227 + { ztest_dmu_prealloc, 1, &zopt_sometimes },
7.228 +#endif
7.229 + { ztest_fzap, 1, &zopt_sometimes },
7.230 + { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
7.231 + { ztest_spa_create_destroy, 1, &zopt_sometimes },
7.232 { ztest_fault_inject, 1, &zopt_sometimes },
7.233 + { ztest_ddt_repair, 1, &zopt_sometimes },
7.234 { ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
7.235 { ztest_spa_rename, 1, &zopt_rarely },
7.236 + { ztest_scrub, 1, &zopt_rarely },
7.237 + { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
7.238 { ztest_vdev_attach_detach, 1, &zopt_rarely },
7.239 { ztest_vdev_LUN_growth, 1, &zopt_rarely },
7.240 - { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
7.241 { ztest_vdev_add_remove, 1, &zopt_vdevtime },
7.242 { ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
7.243 - { ztest_scrub, 1, &zopt_vdevtime },
7.244 };
7.245
7.246 #define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
7.247
7.248 -#define ZTEST_SYNC_LOCKS 16
7.249 -
7.250 /*
7.251 * The following struct is used to hold a list of uncalled commit callbacks.
7.252 - *
7.253 * The callbacks are ordered by txg number.
7.254 */
7.255 typedef struct ztest_cb_list {
7.256 @@ -244,28 +303,34 @@
7.257 * Stuff we need to share writably between parent and child.
7.258 */
7.259 typedef struct ztest_shared {
7.260 + char *zs_pool;
7.261 + spa_t *zs_spa;
7.262 + hrtime_t zs_proc_start;
7.263 + hrtime_t zs_proc_stop;
7.264 + hrtime_t zs_thread_start;
7.265 + hrtime_t zs_thread_stop;
7.266 + hrtime_t zs_thread_kill;
7.267 + uint64_t zs_enospc_count;
7.268 + uint64_t zs_vdev_next_leaf;
7.269 + uint64_t zs_vdev_aux;
7.270 + uint64_t zs_alloc;
7.271 + uint64_t zs_space;
7.272 mutex_t zs_vdev_lock;
7.273 rwlock_t zs_name_lock;
7.274 - uint64_t zs_vdev_next_leaf;
7.275 - uint64_t zs_vdev_aux;
7.276 - uint64_t zs_enospc_count;
7.277 - hrtime_t zs_start_time;
7.278 - hrtime_t zs_stop_time;
7.279 - uint64_t zs_alloc;
7.280 - uint64_t zs_space;
7.281 ztest_info_t zs_info[ZTEST_FUNCS];
7.282 - mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
7.283 - uint64_t zs_seq[ZTEST_SYNC_LOCKS];
7.284 + ztest_ds_t zs_zd[];
7.285 } ztest_shared_t;
7.286 +
7.287 +#define ID_PARALLEL -1ULL
7.288
7.289 static char ztest_dev_template[] = "%s/%s.%llua";
7.290 static char ztest_aux_template[] = "%s/%s.%s.%llu";
7.291 -static ztest_shared_t *ztest_shared;
7.292 +ztest_shared_t *ztest_shared;
7.293 +uint64_t *ztest_seq;
7.294
7.295 static int ztest_random_fd;
7.296 static int ztest_dump_core = 1;
7.297
7.298 -static uint64_t metaslab_sz;
7.299 static boolean_t ztest_exiting;
7.300
7.301 /* Global commit callback list */
7.302 @@ -273,13 +338,13 @@
7.303
7.304 extern uint64_t metaslab_gang_bang;
7.305 extern uint64_t metaslab_df_alloc_threshold;
7.306 -
7.307 -#define ZTEST_DIROBJ 1
7.308 -#define ZTEST_MICROZAP_OBJ 2
7.309 -#define ZTEST_FATZAP_OBJ 3
7.310 -
7.311 -#define ZTEST_DIROBJ_BLOCKSIZE (1 << 10)
7.312 -#define ZTEST_DIRSIZE 256
7.313 +static uint64_t metaslab_sz;
7.314 +
7.315 +enum ztest_object {
7.316 + ZTEST_META_DNODE = 0,
7.317 + ZTEST_DIROBJ,
7.318 + ZTEST_OBJECTS
7.319 +};
7.320
7.321 static void usage(boolean_t) __NORETURN;
7.322
7.323 @@ -433,27 +498,6 @@
7.324 exit(requested ? 0 : 1);
7.325 }
7.326
7.327 -static uint64_t
7.328 -ztest_random(uint64_t range)
7.329 -{
7.330 - uint64_t r;
7.331 -
7.332 - if (range == 0)
7.333 - return (0);
7.334 -
7.335 - if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
7.336 - fatal(1, "short read from /dev/urandom");
7.337 -
7.338 - return (r % range);
7.339 -}
7.340 -
7.341 -/* ARGSUSED */
7.342 -static void
7.343 -ztest_record_enospc(char *s)
7.344 -{
7.345 - ztest_shared->zs_enospc_count++;
7.346 -}
7.347 -
7.348 static void
7.349 process_options(int argc, char **argv)
7.350 {
7.351 @@ -546,8 +590,38 @@
7.352
7.353 zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
7.354
7.355 - zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
7.356 + zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time * NANOSEC / zopt_vdevs :
7.357 + UINT64_MAX >> 2);
7.358 zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
7.359 +}
7.360 +
7.361 +static void
7.362 +ztest_kill(ztest_shared_t *zs)
7.363 +{
7.364 + zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(zs->zs_spa));
7.365 + zs->zs_space = metaslab_class_get_space(spa_normal_class(zs->zs_spa));
7.366 + (void) kill(getpid(), SIGKILL);
7.367 +}
7.368 +
7.369 +static uint64_t
7.370 +ztest_random(uint64_t range)
7.371 +{
7.372 + uint64_t r;
7.373 +
7.374 + if (range == 0)
7.375 + return (0);
7.376 +
7.377 + if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
7.378 + fatal(1, "short read from /dev/urandom");
7.379 +
7.380 + return (r % range);
7.381 +}
7.382 +
7.383 +/* ARGSUSED */
7.384 +static void
7.385 +ztest_record_enospc(const char *s)
7.386 +{
7.387 + ztest_shared->zs_enospc_count++;
7.388 }
7.389
7.390 static uint64_t
7.391 @@ -687,100 +761,805 @@
7.392 return (root);
7.393 }
7.394
7.395 -static void
7.396 -ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
7.397 -{
7.398 - int bs = SPA_MINBLOCKSHIFT +
7.399 - ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
7.400 - int ibs = DN_MIN_INDBLKSHIFT +
7.401 - ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
7.402 - int error;
7.403 -
7.404 - error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
7.405 - if (error) {
7.406 - char osname[300];
7.407 - dmu_objset_name(os, osname);
7.408 - fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
7.409 - osname, object, 1 << bs, ibs, error);
7.410 - }
7.411 -}
7.412 -
7.413 -static uint8_t
7.414 -ztest_random_checksum(void)
7.415 -{
7.416 - uint8_t checksum;
7.417 +static int
7.418 +ztest_random_blocksize(void)
7.419 +{
7.420 + return (1 << (SPA_MINBLOCKSHIFT +
7.421 + ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)));
7.422 +}
7.423 +
7.424 +static int
7.425 +ztest_random_ibshift(void)
7.426 +{
7.427 + return (DN_MIN_INDBLKSHIFT +
7.428 + ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
7.429 +}
7.430 +
7.431 +static uint64_t
7.432 +ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
7.433 +{
7.434 + uint64_t top;
7.435 + vdev_t *rvd = spa->spa_root_vdev;
7.436 + vdev_t *tvd;
7.437 +
7.438 + ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
7.439
7.440 do {
7.441 - checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
7.442 - } while (zio_checksum_table[checksum].ci_zbt);
7.443 -
7.444 - if (checksum == ZIO_CHECKSUM_OFF)
7.445 - checksum = ZIO_CHECKSUM_ON;
7.446 -
7.447 - return (checksum);
7.448 -}
7.449 -
7.450 -static uint8_t
7.451 -ztest_random_compress(void)
7.452 -{
7.453 - return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
7.454 -}
7.455 -
7.456 -static int
7.457 -ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap)
7.458 -{
7.459 - dmu_tx_t *tx;
7.460 - int error;
7.461 + top = ztest_random(rvd->vdev_children);
7.462 + tvd = rvd->vdev_child[top];
7.463 + } while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
7.464 + tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
7.465 +
7.466 + return (top);
7.467 +}
7.468 +
7.469 +static uint64_t
7.470 +ztest_random_dsl_prop(zfs_prop_t prop)
7.471 +{
7.472 + uint64_t value;
7.473 +
7.474 + do {
7.475 + value = zfs_prop_random_value(prop, ztest_random(-1ULL));
7.476 + } while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
7.477 +
7.478 + return (value);
7.479 +}
7.480 +
7.481 +static int
7.482 +ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
7.483 + boolean_t inherit)
7.484 +{
7.485 + const char *propname = zfs_prop_to_name(prop);
7.486 + const char *valname;
7.487 + char setpoint[MAXPATHLEN];
7.488 + uint64_t curval;
7.489 + int error;
7.490 +
7.491 + error = dsl_prop_set(osname, propname, sizeof (value),
7.492 + inherit ? 0 : 1, &value);
7.493 +
7.494 + if (error == ENOSPC) {
7.495 + ztest_record_enospc(FTAG);
7.496 + return (error);
7.497 + }
7.498 + ASSERT3U(error, ==, 0);
7.499 +
7.500 + VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
7.501 + 1, &curval, setpoint), ==, 0);
7.502 +
7.503 + if (zopt_verbose >= 6) {
7.504 + VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
7.505 + (void) printf("%s %s = %s at '%s'\n",
7.506 + osname, propname, valname, setpoint);
7.507 + }
7.508 +
7.509 + return (error);
7.510 +}
7.511 +
7.512 +#if 0
7.513 +static int
7.514 +ztest_spa_prop_set_uint64(ztest_shared_t *zs, zpool_prop_t prop, uint64_t value)
7.515 +{
7.516 + spa_t *spa = zs->zs_spa;
7.517 + nvlist_t *props = NULL;
7.518 + int error;
7.519 +
7.520 + VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
7.521 + VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
7.522 +
7.523 + error = spa_prop_set(spa, props);
7.524 +
7.525 + nvlist_free(props);
7.526 +
7.527 + if (error == ENOSPC) {
7.528 + ztest_record_enospc(FTAG);
7.529 + return (error);
7.530 + }
7.531 + ASSERT3U(error, ==, 0);
7.532 +
7.533 + return (error);
7.534 +}
7.535 +#endif
7.536 +
7.537 +static void
7.538 +ztest_rll_init(rll_t *rll)
7.539 +{
7.540 + rll->rll_writer = NULL;
7.541 + rll->rll_readers = 0;
7.542 + VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
7.543 + VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
7.544 +}
7.545 +
7.546 +static void
7.547 +ztest_rll_destroy(rll_t *rll)
7.548 +{
7.549 + ASSERT(rll->rll_writer == NULL);
7.550 + ASSERT(rll->rll_readers == 0);
7.551 + VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
7.552 + VERIFY(cond_destroy(&rll->rll_cv) == 0);
7.553 +}
7.554 +
7.555 +static void
7.556 +ztest_rll_lock(rll_t *rll, rl_type_t type)
7.557 +{
7.558 + VERIFY(mutex_lock(&rll->rll_lock) == 0);
7.559 +
7.560 + if (type == RL_READER) {
7.561 + while (rll->rll_writer != NULL)
7.562 + (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
7.563 + rll->rll_readers++;
7.564 + } else {
7.565 + while (rll->rll_writer != NULL || rll->rll_readers)
7.566 + (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
7.567 + rll->rll_writer = curthread;
7.568 + }
7.569 +
7.570 + VERIFY(mutex_unlock(&rll->rll_lock) == 0);
7.571 +}
7.572 +
7.573 +static void
7.574 +ztest_rll_unlock(rll_t *rll)
7.575 +{
7.576 + VERIFY(mutex_lock(&rll->rll_lock) == 0);
7.577 +
7.578 + if (rll->rll_writer) {
7.579 + ASSERT(rll->rll_readers == 0);
7.580 + rll->rll_writer = NULL;
7.581 + } else {
7.582 + ASSERT(rll->rll_readers != 0);
7.583 + ASSERT(rll->rll_writer == NULL);
7.584 + rll->rll_readers--;
7.585 + }
7.586 +
7.587 + if (rll->rll_writer == NULL && rll->rll_readers == 0)
7.588 + VERIFY(cond_broadcast(&rll->rll_cv) == 0);
7.589 +
7.590 + VERIFY(mutex_unlock(&rll->rll_lock) == 0);
7.591 +}
7.592 +
7.593 +static void
7.594 +ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
7.595 +{
7.596 + rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
7.597 +
7.598 + ztest_rll_lock(rll, type);
7.599 +}
7.600 +
7.601 +static void
7.602 +ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
7.603 +{
7.604 + rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
7.605 +
7.606 + ztest_rll_unlock(rll);
7.607 +}
7.608 +
7.609 +static rl_t *
7.610 +ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
7.611 + uint64_t size, rl_type_t type)
7.612 +{
7.613 + uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
7.614 + rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
7.615 + rl_t *rl;
7.616 +
7.617 + rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
7.618 + rl->rl_object = object;
7.619 + rl->rl_offset = offset;
7.620 + rl->rl_size = size;
7.621 + rl->rl_lock = rll;
7.622 +
7.623 + ztest_rll_lock(rll, type);
7.624 +
7.625 + return (rl);
7.626 +}
7.627 +
7.628 +static void
7.629 +ztest_range_unlock(rl_t *rl)
7.630 +{
7.631 + rll_t *rll = rl->rl_lock;
7.632 +
7.633 + ztest_rll_unlock(rll);
7.634 +
7.635 + umem_free(rl, sizeof (*rl));
7.636 +}
7.637 +
7.638 +static void
7.639 +ztest_zd_init(ztest_ds_t *zd, objset_t *os)
7.640 +{
7.641 + zd->zd_os = os;
7.642 + zd->zd_zilog = dmu_objset_zil(os);
7.643 + zd->zd_seq = 0;
7.644 + dmu_objset_name(os, zd->zd_name);
7.645 +
7.646 + VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
7.647 +
7.648 + for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
7.649 + ztest_rll_init(&zd->zd_object_lock[l]);
7.650 +
7.651 + for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
7.652 + ztest_rll_init(&zd->zd_range_lock[l]);
7.653 +}
7.654 +
7.655 +static void
7.656 +ztest_zd_fini(ztest_ds_t *zd)
7.657 +{
7.658 + VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
7.659 +
7.660 + for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
7.661 + ztest_rll_destroy(&zd->zd_object_lock[l]);
7.662 +
7.663 + for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
7.664 + ztest_rll_destroy(&zd->zd_range_lock[l]);
7.665 +}
7.666 +
7.667 +#define TXG_MIGHTWAIT (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
7.668 +
7.669 +static uint64_t
7.670 +ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
7.671 +{
7.672 + uint64_t txg;
7.673 + int error;
7.674 +
7.675 + /*
7.676 + * Attempt to assign tx to some transaction group.
7.677 + */
7.678 + error = dmu_tx_assign(tx, txg_how);
7.679 + if (error) {
7.680 + if (error == ERESTART) {
7.681 + ASSERT(txg_how == TXG_NOWAIT);
7.682 + dmu_tx_wait(tx);
7.683 + } else {
7.684 + ASSERT3U(error, ==, ENOSPC);
7.685 + ztest_record_enospc(tag);
7.686 + }
7.687 + dmu_tx_abort(tx);
7.688 + return (0);
7.689 + }
7.690 + txg = dmu_tx_get_txg(tx);
7.691 + ASSERT(txg != 0);
7.692 + return (txg);
7.693 +}
7.694 +
7.695 +static void
7.696 +ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
7.697 +{
7.698 + uint64_t *ip = buf;
7.699 + uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
7.700 +
7.701 + while (ip < ip_end)
7.702 + *ip++ = value;
7.703 +}
7.704 +
7.705 +static boolean_t
7.706 +ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
7.707 +{
7.708 + uint64_t *ip = buf;
7.709 + uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
7.710 + uint64_t diff = 0;
7.711 +
7.712 + while (ip < ip_end)
7.713 + diff |= (value - *ip++);
7.714 +
7.715 + return (diff == 0);
7.716 +}
7.717 +
7.718 +static void
7.719 +ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
7.720 + uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
7.721 +{
7.722 + bt->bt_magic = BT_MAGIC;
7.723 + bt->bt_objset = dmu_objset_id(os);
7.724 + bt->bt_object = object;
7.725 + bt->bt_offset = offset;
7.726 + bt->bt_gen = gen;
7.727 + bt->bt_txg = txg;
7.728 + bt->bt_crtxg = crtxg;
7.729 +}
7.730 +
7.731 +static void
7.732 +ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
7.733 + uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
7.734 +{
7.735 + ASSERT(bt->bt_magic == BT_MAGIC);
7.736