4 * Extract information from Windows NT registry hives.
8 * Copyright (C) 2016 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
30 #include "wimlib/encoding.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/registry.h"
34 #include "wimlib/util.h"
36 /* Registry hive file header */
38 #define REGF_MAGIC cpu_to_le32(0x66676572) /* "regf" */
41 #define REGF_MAJOR cpu_to_le32(1)
45 le32 root_key_offset; /* Offset, in hbin area, to root nk */
46 le32 total_hbin_size; /* Total size of all hbins */
48 u8 hbin_area[0]; /* Start of hbin area */
54 /* The cell size in bytes, negated for in-use cells */
57 /* Magic characters which identify the cell type */
61 /* NK cell - represents a registry key */
63 #define NK_MAGIC cpu_to_le16(0x6B6E) /* "nk" */
65 #define NK_COMPRESSED_NAME cpu_to_le16(0x0020)
72 le32 subkey_list_offset;
75 le32 value_list_offset;
89 /* Subkey list cell. There are four types. LF, LH, and LI cells reference
90 * subkey NK cells directly, while RI cells reference other subkey lists. All
91 * contain a count followed by that many 32-bit offsets. But LF and LH cells
92 * contain a 32-bit hash along with each offset, while LI and RI cells only
95 #define LF_MAGIC cpu_to_le16(0x666C) /* "lf" */
96 #define LH_MAGIC cpu_to_le16(0x686C) /* "lh" */
97 #define LI_MAGIC cpu_to_le16(0x696C) /* "li" */
98 #define RI_MAGIC cpu_to_le16(0x6972) /* "ri" */
104 /* Value list cell - contains a list of value references */
110 /* VK cell - contains a value's data, or a reference to it */
112 #define VK_MAGIC cpu_to_le16(0x6B76)
117 #define REG_NONE cpu_to_le32(0)
118 #define REG_SZ cpu_to_le32(1)
119 #define REG_EXPAND_SZ cpu_to_le32(2)
120 #define REG_BINARY cpu_to_le32(3)
121 #define REG_DWORD cpu_to_le32(4)
122 #define REG_DWORD_LITTLE_ENDIAN cpu_to_le32(4)
123 #define REG_DWORD_BIG_ENDIAN cpu_to_le32(5)
124 #define REG_LINK cpu_to_le32(6)
125 #define REG_MULTI_SZ cpu_to_le32(7)
126 #define REG_RESOURCE_LIST cpu_to_le32(8)
127 #define REG_FULL_RESOURCE_DESCRIPTOR cpu_to_le32(9)
128 #define REG_RESOURCE_REQUIREMENTS_LIST cpu_to_le32(10)
129 #define REG_QWORD cpu_to_le32(11)
130 #define REG_QWORD_LITTLE_ENDIAN cpu_to_le32(11)
132 #define VK_COMPRESSED_NAME cpu_to_le16(0x0001)
138 /* Data cell - contains a value's data */
144 /* Arbitrary limits for safety */
145 #define MAX_VALUES 65536
146 #define MAX_VALUE_SIZE 1048576
147 #define MAX_SUBKEYS 65536
148 #define MAX_SUBKEY_LIST_LEVELS 5
149 #define MAX_SUBKEY_LISTS 4096
151 static enum hive_status
152 translate_wimlib_error(int ret)
156 if (ret == WIMLIB_ERR_NOMEM)
157 return HIVE_OUT_OF_MEMORY;
158 return HIVE_UNSUPPORTED;
161 /* Compare a UTF-16LE name with a key or value name in the registry. The
162 * comparison is case insensitive. */
164 names_equal(const utf16lechar *name, size_t name_nchars,
165 const void *disk_name, size_t disk_name_size,
169 /* ISO-8859-1 (LATIN1) on-disk */
170 const u8 *p = disk_name;
171 if (disk_name_size != name_nchars)
173 for (size_t i = 0; i < name_nchars; i++)
174 if (upcase[le16_to_cpu(name[i])] != upcase[p[i]])
178 /* UTF-16LE on disk */
180 if (disk_name_size != name_nchars)
182 return !cmp_utf16le_strings(name, name_nchars,
183 disk_name, disk_name_size, true);
187 /* Get a pointer to a cell, with alignment and bounds checking. Returns NULL if
188 * the requested information does not specify a properly aligned, sized, and
191 get_cell_pointer(const struct regf *regf, le32 offset, size_t wanted_size)
193 u32 total = le32_to_cpu(regf->total_hbin_size);
194 u32 offs = le32_to_cpu(offset);
195 const struct cell *cell;
198 if ((offs > total) || (offs & 7) || (wanted_size > total - offs))
201 cell = (const struct cell *)®f->hbin_area[offs];
202 actual_size = -le32_to_cpu(cell->size);
203 if (actual_size > INT32_MAX) /* Cell unused, or size was INT32_MIN? */
205 if (wanted_size > actual_size) /* Cell too small? */
210 /* Revalidate the cell with its full length. Returns true iff the cell is
213 revalidate_cell(const struct regf *regf, le32 offset, size_t wanted_size)
215 return get_cell_pointer(regf, offset, wanted_size) != NULL;
218 struct subkey_iteration_stats {
220 /* The number of additional levels of descendent subkey lists that may
221 * be visited (currently, i.e. at this point in the iteration) before
222 * our safety limit of MAX_SUBKEY_LIST_LEVELS is reached */
223 u32 levels_remaining;
225 /* The number of additional subkey lists that may be visited until our
226 * safety limit of MAX_SUBKEY_LISTS is reached */
227 u32 subkey_lists_remaining;
229 /* The number of subkeys remaining to be found. Since the number of
230 * subkeys is known from the parent nk cell, this should be 0 at the end
231 * of the iteration. */
232 u32 subkeys_remaining;
235 typedef enum hive_status (*subkey_cb_t)(const struct nk *, void *);
237 static enum hive_status
238 iterate_subkeys_recursive(const struct regf *regf, le32 subkey_list_offset,
239 subkey_cb_t cb, void *cb_ctx,
240 struct subkey_iteration_stats *stats)
242 const struct subkey_list *list;
243 unsigned num_offsets;
247 enum hive_status status;
249 if (stats->levels_remaining == 0 || stats->subkey_lists_remaining == 0)
252 stats->subkey_lists_remaining--;
254 list = get_cell_pointer(regf, subkey_list_offset,
255 sizeof(struct subkey_list));
259 num_offsets = le16_to_cpu(list->num_offsets);
260 extra_size = num_offsets * sizeof(list->elements[0]);
263 if (list->base.magic == LF_MAGIC || list->base.magic == LH_MAGIC) {
264 /* Hashes are included */
269 if (!revalidate_cell(regf, subkey_list_offset,
270 sizeof(struct subkey_list) + extra_size))
275 switch (list->base.magic) {
279 /* Children are subkeys */
280 if (stats->subkeys_remaining < num_offsets)
282 stats->subkeys_remaining -= num_offsets;
283 while (num_offsets--) {
284 const struct nk *sub_nk;
286 sub_nk = get_cell_pointer(regf, *p, sizeof(struct nk));
287 if (!sub_nk || sub_nk->base.magic != NK_MAGIC)
290 if (!revalidate_cell(regf, *p, sizeof(struct nk) +
291 le16_to_cpu(sub_nk->name_size)))
294 status = (*cb)(sub_nk, cb_ctx);
295 if (status != HIVE_OK)
301 /* Children are subkey lists */
303 stats->levels_remaining--;
304 while (num_offsets--) {
305 status = iterate_subkeys_recursive(regf, *p++,
307 if (status != HIVE_OK)
310 stats->levels_remaining++;
313 return HIVE_UNSUPPORTED;
317 /* Call @cb on each subkey cell of the key @nk. */
318 static enum hive_status
319 iterate_subkeys(const struct regf *regf, const struct nk *nk,
320 subkey_cb_t cb, void *cb_ctx)
322 u32 num_subkeys = le32_to_cpu(nk->num_subkeys);
323 struct subkey_iteration_stats stats;
324 enum hive_status status;
326 if (num_subkeys == 0)
329 if (num_subkeys > MAX_SUBKEYS)
332 stats.levels_remaining = MAX_SUBKEY_LIST_LEVELS;
333 stats.subkey_lists_remaining = MAX_SUBKEY_LISTS;
334 stats.subkeys_remaining = num_subkeys;
336 status = iterate_subkeys_recursive(regf, nk->subkey_list_offset,
338 if (stats.subkeys_remaining != 0 && status == HIVE_OK)
339 status = HIVE_CORRUPT;
343 struct lookup_subkey_ctx {
344 const utf16lechar *key_name;
345 size_t key_name_nchars;
346 const struct nk *result;
349 static enum hive_status
350 lookup_subkey_cb(const struct nk *sub_nk, void *_ctx)
352 struct lookup_subkey_ctx *ctx = _ctx;
354 if (names_equal(ctx->key_name, ctx->key_name_nchars,
355 sub_nk->name, le16_to_cpu(sub_nk->name_size),
356 (sub_nk->flags & NK_COMPRESSED_NAME) != 0))
358 ctx->result = sub_nk;
359 return HIVE_ITERATION_STOPPED;
366 * Given a registry key cell @nk, look up the next component of the key
367 * *key_namep. If found, return HIVE_OK, advance *key_namep past the key name
368 * component, and return the subkey cell in @sub_nk_ret. Otherwise, return
369 * another HIVE_* error code.
371 static enum hive_status
372 lookup_subkey(const struct regf *regf, const utf16lechar **key_namep,
373 const struct nk *nk, const struct nk **sub_nk_ret)
375 const utf16lechar *key_name = *key_namep;
376 size_t key_name_nchars = 0;
377 struct lookup_subkey_ctx ctx;
378 enum hive_status status;
380 while (key_name[key_name_nchars] != cpu_to_le16('\0') &&
381 key_name[key_name_nchars] != cpu_to_le16('\\'))
384 ctx.key_name = key_name;
385 ctx.key_name_nchars = key_name_nchars;
388 status = iterate_subkeys(regf, nk, lookup_subkey_cb, &ctx);
390 if (status == HIVE_OK)
391 status = HIVE_KEY_NOT_FOUND;
395 key_name += key_name_nchars;
396 while (*key_name == cpu_to_le16('\\'))
398 *key_namep = key_name;
399 *sub_nk_ret = ctx.result;
403 /* Find the nk cell for the key named @key_name in the registry hive @regf. */
404 static enum hive_status
405 lookup_key(const struct regf *regf, const tchar *key_name,
406 const struct nk **nk_ret)
409 enum hive_status status;
410 const utf16lechar *key_uname, *key_unamep;
412 nk = get_cell_pointer(regf, regf->root_key_offset, sizeof(struct nk));
413 if (!nk || nk->base.magic != NK_MAGIC)
416 status = translate_wimlib_error(tstr_get_utf16le(key_name, &key_uname));
417 if (status != HIVE_OK)
419 key_unamep = key_uname;
420 while (*key_unamep) {
421 status = lookup_subkey(regf, &key_unamep, nk, &nk);
422 if (status != HIVE_OK)
428 tstr_put_utf16le(key_uname);
432 /* Find the vk cell for the value named @value_name of the key named @key_name
433 * in the registry hive @regf. */
434 static enum hive_status
435 lookup_value(const struct regf *regf, const tchar *key_name,
436 const tchar *value_name, const struct vk **vk_ret)
438 enum hive_status status;
441 const struct value_list *value_list;
442 const utf16lechar *value_uname;
443 size_t value_uname_nchars;
445 /* Look up the nk cell for the key. */
446 status = lookup_key(regf, key_name, &nk);
447 if (status != HIVE_OK)
450 num_values = le32_to_cpu(nk->num_values);
452 if (num_values == 0) /* No values? */
453 return HIVE_VALUE_NOT_FOUND;
455 if (num_values > MAX_VALUES)
458 value_list = get_cell_pointer(regf, nk->value_list_offset,
459 sizeof(struct value_list) +
461 sizeof(value_list->vk_offsets[0])));
465 /* Look for the value in the value list. */
467 status = translate_wimlib_error(
468 tstr_get_utf16le_and_len(value_name, &value_uname,
469 &value_uname_nchars));
470 if (status != HIVE_OK)
472 value_uname_nchars /= 2;
474 for (size_t i = 0; i < num_values; i++) {
478 status = HIVE_CORRUPT;
479 vk = get_cell_pointer(regf, value_list->vk_offsets[i],
481 if (!vk || vk->base.magic != VK_MAGIC)
484 name_size = le16_to_cpu(vk->name_size);
486 if (!revalidate_cell(regf, value_list->vk_offsets[i],
487 sizeof(struct vk) + name_size))
490 if (names_equal(value_uname, value_uname_nchars,
492 (vk->flags & VK_COMPRESSED_NAME) != 0))
500 status = HIVE_VALUE_NOT_FOUND;
502 tstr_put_utf16le(value_uname);
507 * Retrieve the data of the value named @value_name of the key named @key_name
508 * in the registry hive @regf. If the value was found, return HIVE_OK and
509 * return the data, its size, and its type in @data_ret, @data_size_ret, and
510 * @data_type_ret. Otherwise, return another HIVE_* error code.
512 static enum hive_status
513 retrieve_value(const struct regf *regf, const tchar *key_name,
514 const tchar *value_name, void **data_ret,
515 size_t *data_size_ret, le32 *data_type_ret)
517 enum hive_status status;
523 /* Find the vk cell. */
524 status = lookup_value(regf, key_name, value_name, &vk);
525 if (status != HIVE_OK)
528 /* Extract the value data from the vk cell (for inline data) or from the
529 * data cell which it references (for non-inline data). */
531 data_size = le32_to_cpu(vk->data_size);
533 is_inline = (data_size & 0x80000000);
534 data_size &= 0x7FFFFFFF;
536 if (data_size > MAX_VALUE_SIZE)
542 data = &vk->data_offset;
544 const struct data_cell *data_cell;
546 data_cell = get_cell_pointer(regf, vk->data_offset,
547 sizeof(struct data_cell));
551 if (!revalidate_cell(regf, vk->data_offset,
552 sizeof(struct data_cell) + data_size))
553 return HIVE_UNSUPPORTED; /* Possibly a big data cell */
555 data = data_cell->data;
558 *data_ret = memdup(data, data_size);
560 return HIVE_OUT_OF_MEMORY;
561 *data_size_ret = data_size;
562 *data_type_ret = vk->data_type;
566 /* Validate the registry hive file given in memory as @hive_mem and @hive_size.
567 * If valid, return HIVE_OK. If invalid, return another HIVE_* error code. */
569 hive_validate(const void *hive_mem, size_t hive_size)
571 const struct regf *regf = hive_mem;
573 STATIC_ASSERT(sizeof(struct regf) == 4096);
575 if (hive_size < sizeof(struct regf))
578 if (regf->magic != REGF_MAGIC || regf->major_version != REGF_MAJOR)
579 return HIVE_UNSUPPORTED;
581 if (le32_to_cpu(regf->total_hbin_size) > hive_size - sizeof(struct regf))
587 /* Get a string value from the registry hive file. */
589 hive_get_string(const struct regf *regf, const tchar *key_name,
590 const tchar *value_name, tchar **value_ret)
595 enum hive_status status;
597 /* Retrieve the raw value data. */
598 status = retrieve_value(regf, key_name, value_name,
599 &data, &data_size, &data_type);
600 if (status != HIVE_OK)
603 /* Interpret the data as a string, when possible. */
607 status = translate_wimlib_error(
608 utf16le_to_tstr(data, data_size, value_ret, &data_size));
611 status = HIVE_VALUE_IS_WRONG_TYPE;
618 /* Get a number value from the registry hive file. */
620 hive_get_number(const struct regf *regf, const tchar *key_name,
621 const tchar *value_name, s64 *value_ret)
626 enum hive_status status;
628 /* Retrieve the raw value data. */
629 status = retrieve_value(regf, key_name, value_name,
630 &data, &data_size, &data_type);
631 if (status != HIVE_OK)
634 /* Interpret the data as a number, when possible. */
636 case REG_DWORD_LITTLE_ENDIAN:
637 if (data_size == 4) {
638 *value_ret = le32_to_cpu(*(le32 *)data);
641 status = HIVE_CORRUPT;
644 case REG_DWORD_BIG_ENDIAN:
645 if (data_size == 4) {
646 *value_ret = be32_to_cpu(*(be32 *)data);
649 status = HIVE_CORRUPT;
652 case REG_QWORD_LITTLE_ENDIAN:
653 if (data_size == 8) {
654 *value_ret = le64_to_cpu(*(le64 *)data);
657 status = HIVE_CORRUPT;
661 status = HIVE_VALUE_IS_WRONG_TYPE;
669 static enum hive_status
670 append_subkey_name(const struct nk *sub_nk, void *_next_subkey_p)
672 size_t name_size = le16_to_cpu(sub_nk->name_size);
674 tchar ***next_subkeyp = _next_subkey_p;
676 if (sub_nk->flags & NK_COMPRESSED_NAME) {
677 subkey = MALLOC((name_size + 1) * sizeof(tchar));
679 return HIVE_OUT_OF_MEMORY;
680 for (size_t i = 0; i < name_size; i++)
681 subkey[i] = sub_nk->name[i];
682 subkey[name_size] = '\0';
684 enum hive_status status;
686 status = translate_wimlib_error(
687 utf16le_to_tstr((utf16lechar *)sub_nk->name,
688 name_size, &subkey, NULL));
689 if (status != HIVE_OK)
693 **next_subkeyp = subkey;
698 /* List the subkeys of the specified registry key. */
700 hive_list_subkeys(const struct regf *regf, const tchar *key_name,
701 tchar ***subkeys_ret)
703 enum hive_status status;
708 status = lookup_key(regf, key_name, &nk);
709 if (status != HIVE_OK)
712 if (le32_to_cpu(nk->num_subkeys) > MAX_SUBKEYS)
715 subkeys = CALLOC(le32_to_cpu(nk->num_subkeys) + 1, sizeof(subkeys[0]));
717 return HIVE_OUT_OF_MEMORY;
719 next_subkey = subkeys;
720 status = iterate_subkeys(regf, nk, append_subkey_name, &next_subkey);
721 if (status == HIVE_OK)
722 *subkeys_ret = subkeys;
724 hive_free_subkeys_list(subkeys);
729 hive_free_subkeys_list(tchar **subkeys)
731 for (tchar **p = subkeys; *p; p++)
737 hive_status_to_string(enum hive_status status)
743 return "HIVE_CORRUPT";
744 case HIVE_UNSUPPORTED:
745 return "HIVE_UNSUPPORTED";
746 case HIVE_KEY_NOT_FOUND:
747 return "HIVE_KEY_NOT_FOUND";
748 case HIVE_VALUE_NOT_FOUND:
749 return "HIVE_VALUE_NOT_FOUND";
750 case HIVE_VALUE_IS_WRONG_TYPE:
751 return "HIVE_VALUE_IS_WRONG_TYPE";
752 case HIVE_OUT_OF_MEMORY:
753 return "HIVE_OUT_OF_MEMORY";
754 case HIVE_ITERATION_STOPPED:
755 return "HIVE_ITERATION_STOPPED";