From df0ec4aceda8e33934eb53b05a1b84905ade5464 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Fri, 2 May 2014 16:58:39 -0500 Subject: [PATCH 07/26] qcow2: Metadata overlap checks RH-Author: Max Reitz Message-id: <1399049936-13496-8-git-send-email-mreitz@redhat.com> Patchwork-id: 58653 O-Subject: [RHEL-6.6 qemu-kvm PATCH v3 07/24] qcow2: Metadata overlap checks Bugzilla: 1004420 RH-Acked-by: Laszlo Ersek RH-Acked-by: Kevin Wolf RH-Acked-by: Stefan Hajnoczi BZ: 1004420 Two new functions are added; the first one checks a given range in the image file for overlaps with metadata (main header, L1 tables, L2 tables, refcount table and blocks). The second one should be used immediately before writing to the image file as it calls the first function and, upon collision, marks the image as corrupt and makes the BDS unusable, thereby preventing further access. Both functions take a bitmask argument specifying the structures which should be checked for overlaps, making it possible to also check metadata writes against colliding with other structures. Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf (cherry picked from commit a40f1c2add4d5f58d594f810fe36cabcf32bc4b0) Conflicts: block/qcow2-refcount.c block/qcow2.h include/monitor/monitor.h monitor.h block/qcow2-refcount.c conflicts because of two reasons: - different header names downstream and "monitor.h" not yet included - there is no way to mark an image corrupt without compat level 1.1, therefore we can only resort to asking the user kindly to use qemu-img check -r qcow2_process_discards() is missing in downstream qcow2.h. monitor.h has been moved to include/monitor/ upstream. Signed-off-by: Max Reitz --- block/qcow2-refcount.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 39 +++++++++++ monitor.c | 1 + monitor.h | 1 + 4 files changed, 213 insertions(+) Signed-off-by: Jeff E. Nelson --- block/qcow2-refcount.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 39 +++++++++++ monitor.c | 1 + monitor.h | 1 + 4 files changed, 213 insertions(+), 0 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 7770a9e..5abb196 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -25,6 +25,9 @@ #include "qemu-common.h" #include "block_int.h" #include "block/qcow2.h" +#include "range.h" +#include "qemu-objects.h" +#include "monitor.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size); static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, @@ -1232,3 +1235,172 @@ fail: return ret; } +#define overlaps_with(ofs, sz) \ + ranges_overlap(offset, size, ofs, sz) + +/* + * Checks if the given offset into the image file is actually free to use by + * looking for overlaps with important metadata sections (L1/L2 tables etc.), + * i.e. a sanity check without relying on the refcount tables. + * + * The chk parameter specifies exactly what checks to perform (being a bitmask + * of QCow2MetadataOverlap values). + * + * Returns: + * - 0 if writing to this offset will not affect the mentioned metadata + * - a positive QCow2MetadataOverlap value indicating one overlapping section + * - a negative value (-errno) indicating an error while performing a check, + * e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2 + */ +int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset, + int64_t size) +{ + BDRVQcowState *s = bs->opaque; + int i, j; + + if (!size) { + return 0; + } + + if (chk & QCOW2_OL_MAIN_HEADER) { + if (offset < s->cluster_size) { + return QCOW2_OL_MAIN_HEADER; + } + } + + /* align range to test to cluster boundaries */ + size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size); + offset = start_of_cluster(s, offset); + + if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) { + if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) { + return QCOW2_OL_ACTIVE_L1; + } + } + + if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) { + if (overlaps_with(s->refcount_table_offset, + s->refcount_table_size * sizeof(uint64_t))) { + return QCOW2_OL_REFCOUNT_TABLE; + } + } + + if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) { + if (overlaps_with(s->snapshots_offset, s->snapshots_size)) { + return QCOW2_OL_SNAPSHOT_TABLE; + } + } + + if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) { + for (i = 0; i < s->nb_snapshots; i++) { + if (s->snapshots[i].l1_size && + overlaps_with(s->snapshots[i].l1_table_offset, + s->snapshots[i].l1_size * sizeof(uint64_t))) { + return QCOW2_OL_INACTIVE_L1; + } + } + } + + if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) { + for (i = 0; i < s->l1_size; i++) { + if ((s->l1_table[i] & L1E_OFFSET_MASK) && + overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK, + s->cluster_size)) { + return QCOW2_OL_ACTIVE_L2; + } + } + } + + if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) { + for (i = 0; i < s->refcount_table_size; i++) { + if ((s->refcount_table[i] & REFT_OFFSET_MASK) && + overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK, + s->cluster_size)) { + return QCOW2_OL_REFCOUNT_BLOCK; + } + } + } + + if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) { + for (i = 0; i < s->nb_snapshots; i++) { + uint64_t l1_ofs = s->snapshots[i].l1_table_offset; + uint32_t l1_sz = s->snapshots[i].l1_size; + uint64_t *l1 = g_malloc(l1_sz * sizeof(uint64_t)); + int ret; + + ret = bdrv_read(bs->file, l1_ofs / BDRV_SECTOR_SIZE, (uint8_t *)l1, + l1_sz * sizeof(uint64_t) / BDRV_SECTOR_SIZE); + + if (ret < 0) { + g_free(l1); + return ret; + } + + for (j = 0; j < l1_sz; j++) { + if ((l1[j] & L1E_OFFSET_MASK) && + overlaps_with(l1[j] & L1E_OFFSET_MASK, s->cluster_size)) { + g_free(l1); + return QCOW2_OL_INACTIVE_L2; + } + } + + g_free(l1); + } + } + + return 0; +} + +static const char *metadata_ol_names[] = { + [QCOW2_OL_MAIN_HEADER_BITNR] = "qcow2_header", + [QCOW2_OL_ACTIVE_L1_BITNR] = "active L1 table", + [QCOW2_OL_ACTIVE_L2_BITNR] = "active L2 table", + [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table", + [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block", + [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table", + [QCOW2_OL_INACTIVE_L1_BITNR] = "inactive L1 table", + [QCOW2_OL_INACTIVE_L2_BITNR] = "inactive L2 table", +}; + +/* + * First performs a check for metadata overlaps (through + * qcow2_check_metadata_overlap); if that fails with a negative value (error + * while performing a check), that value is returned. If an impending overlap + * is detected, the BDS will be made unusable, the qcow2 file marked corrupt + * and -EIO returned. + * + * Returns 0 if there were neither overlaps nor errors while checking for + * overlaps; or a negative value (-errno) on error. + */ +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset, + int64_t size) +{ + int ret = qcow2_check_metadata_overlap(bs, chk, offset, size); + + if (ret < 0) { + return ret; + } else if (ret > 0) { + int metadata_ol_bitnr = ffs(ret) - 1; + char *message; + QObject *data; + + assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR); + + fprintf(stderr, "qcow2: Preventing invalid write on metadata (overlaps " + "with %s); please use qemu-img check -r.\n", + metadata_ol_names[metadata_ol_bitnr]); + message = g_strdup_printf("Prevented %s overwrite", + metadata_ol_names[metadata_ol_bitnr]); + data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %" + PRId64 ", 'size': %" PRId64 " }", bs->device_name, message, + offset, size); + monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data); + g_free(message); + qobject_decref(data); + + bs->drv = NULL; /* make BDS unusable */ + return -EIO; + } + + return 0; +} diff --git a/block/qcow2.h b/block/qcow2.h index 832bbfb..dc437ae 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -200,6 +200,40 @@ enum { QCOW2_CLUSTER_COMPRESSED, }; +typedef enum QCow2MetadataOverlap { + QCOW2_OL_MAIN_HEADER_BITNR = 0, + QCOW2_OL_ACTIVE_L1_BITNR = 1, + QCOW2_OL_ACTIVE_L2_BITNR = 2, + QCOW2_OL_REFCOUNT_TABLE_BITNR = 3, + QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4, + QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5, + QCOW2_OL_INACTIVE_L1_BITNR = 6, + QCOW2_OL_INACTIVE_L2_BITNR = 7, + + QCOW2_OL_MAX_BITNR = 8, + + QCOW2_OL_NONE = 0, + QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR), + QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR), + QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR), + QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR), + QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR), + QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR), + QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR), + /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv + * reads. */ + QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR), +} QCow2MetadataOverlap; + +/* Perform all overlap checks which don't require disk access */ +#define QCOW2_OL_CACHED \ + (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_ACTIVE_L2 | \ + QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_REFCOUNT_BLOCK | \ + QCOW2_OL_SNAPSHOT_TABLE | QCOW2_OL_INACTIVE_L1) + +/* The default checks to perform */ +#define QCOW2_OL_DEFAULT QCOW2_OL_CACHED + #define L1E_OFFSET_MASK 0x00ffffffffffff00ULL #define L2E_OFFSET_MASK 0x00ffffffffffff00ULL #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL @@ -275,6 +309,11 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); +int qcow2_check_metadata_overlap(BlockDriverState *bs, int chk, int64_t offset, + int64_t size); +int qcow2_pre_write_overlap_check(BlockDriverState *bs, int chk, int64_t offset, + int64_t size); + /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size); void qcow2_l2_cache_reset(BlockDriverState *bs); diff --git a/monitor.c b/monitor.c index 2d78c2d..7081671 100644 --- a/monitor.c +++ b/monitor.c @@ -508,6 +508,7 @@ static const char *monitor_event_names[] = { [QEVENT_BALLOON_CHANGE] = "BALLOON_CHANGE", [QEVENT_SPICE_MIGRATE_COMPLETED] = "SPICE_MIGRATE_COMPLETED", [QEVENT_GUEST_PANICKED] = "GUEST_PANICKED", + [QEVENT_BLOCK_IMAGE_CORRUPTED] = "BLOCK_IMAGE_CORRUPTED", }; QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) diff --git a/monitor.h b/monitor.h index 4efb0d7..4b4a00d 100644 --- a/monitor.h +++ b/monitor.h @@ -50,6 +50,7 @@ typedef enum MonitorEvent { QEVENT_BALLOON_CHANGE, QEVENT_SPICE_MIGRATE_COMPLETED, QEVENT_GUEST_PANICKED, + QEVENT_BLOCK_IMAGE_CORRUPTED, /* Add to 'monitor_event_names' array in monitor.c when * defining new events here */ -- 1.7.1