4 * Extract information from Windows NT registry hives.
8 * Copyright (C) 2016 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/encoding.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/registry.h"
34 #include "wimlib/util.h"
36 /* Registry hive file header */
38 #define REGF_MAGIC cpu_to_le32(0x66676572) /* "regf" */
41 #define REGF_MAJOR cpu_to_le32(1)
45 le32 root_key_offset; /* Offset, in hbin area, to root nk */
46 le32 total_hbin_size; /* Total size of all hbins */
48 u8 hbin_area[0]; /* Start of hbin area */
54 /* The cell size in bytes, negated for in-use cells */
57 /* Magic characters which identify the cell type */
61 /* NK cell - represents a registry key */
63 #define NK_MAGIC cpu_to_le16(0x6B6E) /* "nk" */
65 #define NK_COMPRESSED_NAME cpu_to_le16(0x0020)
72 le32 subkey_list_offset;
75 le32 value_list_offset;
89 /* Subkey list cell. There are four types. LF, LH, and LI cells reference
90 * subkey NK cells directly, while RI cells reference other subkey lists. All
91 * contain a count followed by that many 32-bit offsets. But LF and LH cells
92 * contain a 32-bit hash along with each offset, while LI and RI cells only
95 #define LF_MAGIC cpu_to_le16(0x666C) /* "lf" */
96 #define LH_MAGIC cpu_to_le16(0x686C) /* "lh" */
97 #define LI_MAGIC cpu_to_le16(0x696C) /* "li" */
98 #define RI_MAGIC cpu_to_le16(0x6972) /* "ri" */
104 /* Value list cell - contains a list of value references */
110 /* VK cell - contains a value's data, or a reference to it */
112 #define VK_MAGIC cpu_to_le16(0x6B76)
117 #define REG_NONE cpu_to_le32(0)
118 #define REG_SZ cpu_to_le32(1)
119 #define REG_EXPAND_SZ cpu_to_le32(2)
120 #define REG_BINARY cpu_to_le32(3)
121 #define REG_DWORD cpu_to_le32(4)
122 #define REG_DWORD_LITTLE_ENDIAN cpu_to_le32(4)
123 #define REG_DWORD_BIG_ENDIAN cpu_to_le32(5)
124 #define REG_LINK cpu_to_le32(6)
125 #define REG_MULTI_SZ cpu_to_le32(7)
126 #define REG_RESOURCE_LIST cpu_to_le32(8)
127 #define REG_FULL_RESOURCE_DESCRIPTOR cpu_to_le32(9)
128 #define REG_RESOURCE_REQUIREMENTS_LIST cpu_to_le32(10)
129 #define REG_QWORD cpu_to_le32(11)
130 #define REG_QWORD_LITTLE_ENDIAN cpu_to_le32(11)
132 #define VK_COMPRESSED_NAME cpu_to_le16(0x0001)
138 /* Data cell - contains a value's data */
144 /* Arbitrary limits for safety */
145 #define MAX_VALUES 65536
146 #define MAX_SUBKEYS 65536
147 #define MAX_SUBKEY_LIST_LEVELS 5
148 #define MAX_SUBKEY_LISTS 4096
150 static enum hive_status
151 translate_wimlib_error(int ret)
155 if (ret == WIMLIB_ERR_NOMEM)
156 return HIVE_OUT_OF_MEMORY;
157 return HIVE_UNSUPPORTED;
160 /* Compare a UTF-16LE name with a key or value name in the registry. The
161 * comparison is case insensitive. */
163 names_equal(const utf16lechar *name, size_t name_nchars,
164 const void *disk_name, size_t disk_name_size,
168 /* ISO-8859-1 (LATIN1) on-disk */
169 const u8 *p = disk_name;
170 if (disk_name_size != name_nchars)
172 for (size_t i = 0; i < name_nchars; i++)
173 if (upcase[le16_to_cpu(name[i])] != upcase[p[i]])
177 /* UTF-16LE on disk */
179 if (disk_name_size != name_nchars)
181 return !cmp_utf16le_strings(name, name_nchars,
182 disk_name, disk_name_size, true);
186 /* Get a pointer to a cell, with alignment and bounds checking. Returns NULL if
187 * the requested information does not specify a properly aligned, sized, and
190 get_cell_pointer(const struct regf *regf, le32 offset, size_t wanted_size)
192 u32 total = le32_to_cpu(regf->total_hbin_size);
193 u32 offs = le32_to_cpu(offset);
194 const struct cell *cell;
197 if ((offs > total) || (offs & 7) || (wanted_size > total - offs))
200 cell = (const struct cell *)®f->hbin_area[offs];
201 actual_size = le32_to_cpu(cell->size);
202 if (actual_size >= 0) /* Cell not in use? */
204 if (wanted_size > -actual_size) /* Cell too small? */
209 /* Revalidate the cell with its full length. Returns true iff the cell is
212 revalidate_cell(const struct regf *regf, le32 offset, size_t wanted_size)
214 return get_cell_pointer(regf, offset, wanted_size) != NULL;
217 struct subkey_iteration_stats {
219 /* The number of additional levels of descendent subkey lists that may
220 * be visited (currently, i.e. at this point in the iteration) before
221 * our safety limit of MAX_SUBKEY_LIST_LEVELS is reached */
222 u32 levels_remaining;
224 /* The number of additional subkey lists that may be visited until our
225 * safety limit of MAX_SUBKEY_LISTS is reached */
226 u32 subkey_lists_remaining;
228 /* The number of subkeys remaining to be found. Since the number of
229 * subkeys is known from the parent nk cell, this should be 0 at the end
230 * of the iteration. */
231 u32 subkeys_remaining;
234 typedef enum hive_status (*subkey_cb_t)(const struct nk *, void *);
236 static enum hive_status
237 iterate_subkeys_recursive(const struct regf *regf, le32 subkey_list_offset,
238 subkey_cb_t cb, void *cb_ctx,
239 struct subkey_iteration_stats *stats)
241 const struct subkey_list *list;
242 unsigned num_offsets;
246 enum hive_status status;
248 if (stats->levels_remaining == 0 || stats->subkey_lists_remaining == 0)
251 stats->subkey_lists_remaining--;
253 list = get_cell_pointer(regf, subkey_list_offset,
254 sizeof(struct subkey_list));
258 num_offsets = le16_to_cpu(list->num_offsets);
259 extra_size = num_offsets * sizeof(list->elements[0]);
262 if (list->base.magic == LF_MAGIC || list->base.magic == LH_MAGIC) {
263 /* Hashes are included */
268 if (!revalidate_cell(regf, subkey_list_offset,
269 sizeof(struct subkey_list) + extra_size))
274 switch (list->base.magic) {
278 /* Children are subkeys */
279 if (stats->subkeys_remaining < num_offsets)
281 stats->subkeys_remaining -= num_offsets;
282 while (num_offsets--) {
283 const struct nk *sub_nk;
285 sub_nk = get_cell_pointer(regf, *p, sizeof(struct nk));
286 if (!sub_nk || sub_nk->base.magic != NK_MAGIC)
289 if (!revalidate_cell(regf, *p, sizeof(struct nk) +
290 le16_to_cpu(sub_nk->name_size)))
293 status = (*cb)(sub_nk, cb_ctx);
294 if (status != HIVE_OK)
300 /* Children are subkey lists */
302 stats->levels_remaining--;
303 while (num_offsets--) {
304 status = iterate_subkeys_recursive(regf, *p++,
306 if (status != HIVE_OK)
309 stats->levels_remaining++;
312 return HIVE_UNSUPPORTED;
316 /* Call @cb on each subkey cell of the key @nk. */
317 static enum hive_status
318 iterate_subkeys(const struct regf *regf, const struct nk *nk,
319 subkey_cb_t cb, void *cb_ctx)
321 u32 num_subkeys = le32_to_cpu(nk->num_subkeys);
322 struct subkey_iteration_stats stats;
323 enum hive_status status;
325 if (num_subkeys == 0)
328 if (num_subkeys > MAX_SUBKEYS)
331 stats.levels_remaining = MAX_SUBKEY_LIST_LEVELS;
332 stats.subkey_lists_remaining = MAX_SUBKEY_LISTS;
333 stats.subkeys_remaining = num_subkeys;
335 status = iterate_subkeys_recursive(regf, nk->subkey_list_offset,
337 if (stats.subkeys_remaining != 0 && status == HIVE_OK)
338 status = HIVE_CORRUPT;
342 struct lookup_subkey_ctx {
343 const utf16lechar *key_name;
344 size_t key_name_nchars;
345 const struct nk *result;
348 static enum hive_status
349 lookup_subkey_cb(const struct nk *sub_nk, void *_ctx)
351 struct lookup_subkey_ctx *ctx = _ctx;
353 if (names_equal(ctx->key_name, ctx->key_name_nchars,
354 sub_nk->name, le16_to_cpu(sub_nk->name_size),
355 (sub_nk->flags & NK_COMPRESSED_NAME)))
357 ctx->result = sub_nk;
358 return HIVE_ITERATION_STOPPED;
365 * Given a registry key cell @nk, look up the next component of the key
366 * *key_namep. If found, return HIVE_OK, advance *key_namep past the key name
367 * component, and return the subkey cell in @sub_nk_ret. Otherwise, return
368 * another HIVE_* error code.
370 static enum hive_status
371 lookup_subkey(const struct regf *regf, const utf16lechar **key_namep,
372 const struct nk *nk, const struct nk **sub_nk_ret)
374 const utf16lechar *key_name = *key_namep;
375 size_t key_name_nchars = 0;
376 struct lookup_subkey_ctx ctx;
377 enum hive_status status;
379 while (key_name[key_name_nchars] != cpu_to_le16('\0') &&
380 key_name[key_name_nchars] != cpu_to_le16('\\'))
383 ctx.key_name = key_name;
384 ctx.key_name_nchars = key_name_nchars;
387 status = iterate_subkeys(regf, nk, lookup_subkey_cb, &ctx);
389 if (status == HIVE_OK)
390 status = HIVE_KEY_NOT_FOUND;
394 key_name += key_name_nchars;
395 while (*key_name == cpu_to_le16('\\'))
397 *key_namep = key_name;
398 *sub_nk_ret = ctx.result;
402 /* Find the nk cell for the key named @key_name in the registry hive @regf. */
403 static enum hive_status
404 lookup_key(const struct regf *regf, const tchar *key_name,
405 const struct nk **nk_ret)
408 enum hive_status status;
409 const utf16lechar *key_uname, *key_unamep;
411 nk = get_cell_pointer(regf, regf->root_key_offset, sizeof(struct nk));
412 if (!nk || nk->base.magic != NK_MAGIC)
415 status = translate_wimlib_error(tstr_get_utf16le(key_name, &key_uname));
416 if (status != HIVE_OK)
418 key_unamep = key_uname;
419 while (*key_unamep) {
420 status = lookup_subkey(regf, &key_unamep, nk, &nk);
421 if (status != HIVE_OK)
427 tstr_put_utf16le(key_uname);
431 /* Find the vk cell for the value named @value_name of the key named @key_name
432 * in the registry hive @regf. */
433 static enum hive_status
434 lookup_value(const struct regf *regf, const tchar *key_name,
435 const tchar *value_name, const struct vk **vk_ret)
437 enum hive_status status;
440 const struct value_list *value_list;
441 const utf16lechar *value_uname;
442 size_t value_uname_nchars;
444 /* Look up the nk cell for the key. */
445 status = lookup_key(regf, key_name, &nk);
446 if (status != HIVE_OK)
449 num_values = le32_to_cpu(nk->num_values);
451 if (num_values == 0) /* No values? */
452 return HIVE_VALUE_NOT_FOUND;
454 if (num_values > MAX_VALUES)
457 value_list = get_cell_pointer(regf, nk->value_list_offset,
458 sizeof(struct value_list) +
460 sizeof(value_list->vk_offsets[0])));
464 /* Look for the value in the value list. */
466 status = translate_wimlib_error(
467 tstr_get_utf16le_and_len(value_name, &value_uname,
468 &value_uname_nchars));
469 if (status != HIVE_OK)
471 value_uname_nchars /= 2;
473 for (size_t i = 0; i < num_values; i++) {
477 status = HIVE_CORRUPT;
478 vk = get_cell_pointer(regf, value_list->vk_offsets[i],
480 if (!vk || vk->base.magic != VK_MAGIC)
483 name_size = le16_to_cpu(vk->name_size);
485 if (!revalidate_cell(regf, value_list->vk_offsets[i],
486 sizeof(struct vk) + name_size))
489 if (names_equal(value_uname, value_uname_nchars,
491 (vk->flags & VK_COMPRESSED_NAME)))
499 status = HIVE_VALUE_NOT_FOUND;
501 tstr_put_utf16le(value_uname);
506 * Retrieve the data of the value named @value_name of the key named @key_name
507 * in the registry hive @regf. If the value was found, return HIVE_OK and
508 * return the data, its size, and its type in @data_ret, @data_size_ret, and
509 * @data_type_ret. Otherwise, return another HIVE_* error code.
511 static enum hive_status
512 retrieve_value(const struct regf *regf, const tchar *key_name,
513 const tchar *value_name, void **data_ret,
514 size_t *data_size_ret, le32 *data_type_ret)
516 enum hive_status status;
522 /* Find the vk cell. */
523 status = lookup_value(regf, key_name, value_name, &vk);
524 if (status != HIVE_OK)
527 /* Extract the value data from the vk cell (for inline data) or from the
528 * data cell which it references (for non-inline data). */
530 data_size = le32_to_cpu(vk->data_size);
532 is_inline = (data_size & 0x80000000);
533 data_size &= 0x7FFFFFFF;
535 if (data_size > 1048576) /* Arbitrary limit */
541 data = &vk->data_offset;
543 const struct data_cell *data_cell;
545 data_cell = get_cell_pointer(regf, vk->data_offset,
546 sizeof(struct data_cell));
550 if (!revalidate_cell(regf, vk->data_offset,
551 sizeof(struct data_cell) + data_size))
552 return HIVE_UNSUPPORTED; /* Possibly a big data cell */
554 data = data_cell->data;
557 *data_ret = memdup(data, data_size);
559 return HIVE_OUT_OF_MEMORY;
560 *data_size_ret = data_size;
561 *data_type_ret = vk->data_type;
565 /* Validate the registry hive file given in memory as @hive_mem and @hive_size.
566 * If valid, return HIVE_OK. If invalid, return another HIVE_* error code. */
568 hive_validate(const void *hive_mem, size_t hive_size)
570 const struct regf *regf = hive_mem;
572 STATIC_ASSERT(sizeof(struct regf) == 4096);
574 if (hive_size < sizeof(struct regf))
577 if (regf->magic != REGF_MAGIC || regf->major_version != REGF_MAJOR)
578 return HIVE_UNSUPPORTED;
580 if (le32_to_cpu(regf->total_hbin_size) > hive_size - sizeof(struct regf))
586 /* Get a string value from the registry hive file. */
588 hive_get_string(const struct regf *regf, const tchar *key_name,
589 const tchar *value_name, tchar **value_ret)
594 enum hive_status status;
596 /* Retrieve the raw value data. */
597 status = retrieve_value(regf, key_name, value_name,
598 &data, &data_size, &data_type);
599 if (status != HIVE_OK)
602 /* Interpret the data as a string, when possible. */
606 status = translate_wimlib_error(
607 utf16le_to_tstr(data, data_size, value_ret, &data_size));
610 status = HIVE_VALUE_IS_WRONG_TYPE;
617 /* Get a number value from the registry hive file. */
619 hive_get_number(const struct regf *regf, const tchar *key_name,
620 const tchar *value_name, s64 *value_ret)
625 enum hive_status status;
627 /* Retrieve the raw value data. */
628 status = retrieve_value(regf, key_name, value_name,
629 &data, &data_size, &data_type);
630 if (status != HIVE_OK)
633 /* Interpret the data as a number, when possible. */
635 case REG_DWORD_LITTLE_ENDIAN:
636 if (data_size == 4) {
637 *value_ret = le32_to_cpu(*(le32 *)data);
640 status = HIVE_CORRUPT;
643 case REG_DWORD_BIG_ENDIAN:
644 if (data_size == 4) {
645 *value_ret = be32_to_cpu(*(be32 *)data);
648 status = HIVE_CORRUPT;
651 case REG_QWORD_LITTLE_ENDIAN:
652 if (data_size == 8) {
653 *value_ret = le64_to_cpu(*(le64 *)data);
656 status = HIVE_CORRUPT;
660 status = HIVE_VALUE_IS_WRONG_TYPE;
668 static enum hive_status
669 append_subkey_name(const struct nk *sub_nk, void *_next_subkey_p)
671 size_t name_size = le16_to_cpu(sub_nk->name_size);
673 tchar ***next_subkeyp = _next_subkey_p;
675 if (sub_nk->flags & NK_COMPRESSED_NAME) {
676 subkey = MALLOC((name_size + 1) * sizeof(tchar));
678 return HIVE_OUT_OF_MEMORY;
679 for (size_t i = 0; i < name_size; i++)
680 subkey[i] = sub_nk->name[i];
681 subkey[name_size] = '\0';
684 enum hive_status status;
686 status = translate_wimlib_error(
687 utf16le_to_tstr((utf16lechar *)sub_nk->name,
688 name_size, &subkey, &dummy));
689 if (status != HIVE_OK)
693 **next_subkeyp = subkey;
698 /* List the subkeys of the specified registry key. */
700 hive_list_subkeys(const struct regf *regf, const tchar *key_name,
701 tchar ***subkeys_ret)
703 enum hive_status status;
708 status = lookup_key(regf, key_name, &nk);
709 if (status != HIVE_OK)
712 if (le32_to_cpu(nk->num_subkeys) > MAX_SUBKEYS)
715 subkeys = CALLOC(le32_to_cpu(nk->num_subkeys) + 1, sizeof(subkeys[0]));
717 return HIVE_OUT_OF_MEMORY;
719 next_subkey = subkeys;
720 status = iterate_subkeys(regf, nk, append_subkey_name, &next_subkey);
721 if (status == HIVE_OK)
722 *subkeys_ret = subkeys;
724 hive_free_subkeys_list(subkeys);
729 hive_free_subkeys_list(tchar **subkeys)
731 for (tchar **p = subkeys; *p; p++)
737 hive_status_to_string(enum hive_status status)
743 return "HIVE_CORRUPT";
744 case HIVE_UNSUPPORTED:
745 return "HIVE_UNSUPPORTED";
746 case HIVE_KEY_NOT_FOUND:
747 return "HIVE_KEY_NOT_FOUND";
748 case HIVE_VALUE_NOT_FOUND:
749 return "HIVE_VALUE_NOT_FOUND";
750 case HIVE_VALUE_IS_WRONG_TYPE:
751 return "HIVE_VALUE_IS_WRONG_TYPE";
752 case HIVE_OUT_OF_MEMORY:
753 return "HIVE_OUT_OF_MEMORY";
754 case HIVE_ITERATION_STOPPED:
755 return "HIVE_ITERATION_STOPPED";