]> wimlib.net Git - wimlib/blobdiff - src/registry.c
Implement setting of Windows-specific XML information
[wimlib] / src / registry.c
diff --git a/src/registry.c b/src/registry.c
new file mode 100644 (file)
index 0000000..02ab8c0
--- /dev/null
@@ -0,0 +1,677 @@
+/*
+ * registry.c
+ *
+ * Extract information from Windows NT registry hives.
+ */
+
+/*
+ * Copyright (C) 2016 Eric Biggers
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <string.h>
+
+#include "wimlib/encoding.h"
+#include "wimlib/endianness.h"
+#include "wimlib/error.h"
+#include "wimlib/registry.h"
+#include "wimlib/util.h"
+
+/* Registry hive file header  */
+struct regf {
+#define REGF_MAGIC             cpu_to_le32(0x66676572) /* "regf" */
+       le32 magic;
+       le32 f1[4];
+#define REGF_MAJOR             cpu_to_le32(1)
+       le32 major_version;
+       le32 minor_version;
+       le32 f2[2];
+       le32 root_key_offset;           /* Offset, in hbin area, to root nk  */
+       le32 total_hbin_size;           /* Total size of all hbins  */
+       le32 f3[1013];
+       u8 hbin_area[0];                /* Start of hbin area  */
+} _packed_attribute;
+
+
+/* Cell header  */
+struct cell {
+       /* The cell size in bytes, negated for in-use cells  */
+       le32 size;
+
+       /* Magic characters which identify the cell type  */
+       le16 magic;
+} _packed_attribute;
+
+/* NK cell - represents a registry key  */
+struct nk {
+#define NK_MAGIC               cpu_to_le16(0x6B6E)     /* "nk" */
+       struct cell base;
+#define NK_COMPRESSED_NAME     cpu_to_le16(0x0020)
+       le16 flags;
+       le64 unknown_0x08;
+       le32 unknown_0x10;
+       le32 parent_offset;
+       le32 num_subkeys;
+       le32 unknown_0x1C;
+       le32 subkey_list_offset;
+       le32 unknown_0x24;
+       le32 num_values;
+       le32 value_list_offset;
+       le32 unknown_0x30;
+       le32 unknown_0x34;
+       le16 unknown_0x38;
+       le16 unknown_0x3A;
+       le32 unknown_0x3C;
+       le32 unknown_0x40;
+       le32 unknown_0x44;
+       le32 unknown_0x48;
+       le16 name_size;
+       le16 unknown_0x4E;
+       char name[0];
+} _packed_attribute;
+
+/* LF (or LH) cell - contains a list of subkey references.  LF and LH cells are
+ * the same except they use different hashing algorithms.  But this
+ * implementation doesn't yet make use of the hashes anyway.  */
+struct lf {
+#define LF_MAGIC       cpu_to_le16(0x666C)     /* "lf" */
+#define LH_MAGIC       cpu_to_le16(0x686C)     /* "lh" */
+       struct cell base;
+       le16 num_subkeys;
+       struct {
+               le32 offset;
+               le32 subkey_name_hash;
+       } subkeys[0];
+} _packed_attribute;
+
+/* Value list cell - contains a list of value references  */
+struct value_list {
+       le32 size;
+       le32 vk_offsets[0];
+} _packed_attribute;
+
+/* VK cell - contains a value's data, or a reference to it  */
+struct vk {
+       struct cell base;
+       le16 name_size;
+       le32 data_size;
+       le32 data_offset;
+#define REG_NONE                       cpu_to_le32(0x00000000)
+#define REG_SZ                         cpu_to_le32(0x00000001)
+#define REG_EXPAND_SZ                  cpu_to_le32(0x00000002)
+#define REG_BINARY                     cpu_to_le32(0x00000003)
+#define REG_DWORD                      cpu_to_le32(0x00000004)
+#define REG_DWORD_LITTLE_ENDIAN                cpu_to_le32(0x00000004)
+#define REG_DWORD_BIG_ENDIAN           cpu_to_le32(0x00000005)
+#define REG_LINK                       cpu_to_le32(0x00000006)
+#define REG_MULTI_SZ                   cpu_to_le32(0x00000007)
+#define REG_RESOURCE_LIST              cpu_to_le32(0x00000008)
+#define REG_FULL_RESOURCE_DESCRIPTOR   cpu_to_le32(0x00000009)
+#define REG_RESOURCE_REQUIREMENTS_LIST cpu_to_le32(0x0000000A)
+#define REG_QWORD                      cpu_to_le32(0x0000000B)
+#define REG_QWORD_LITTLE_ENDIAN                cpu_to_le32(0x0000000B)
+       le32 data_type;
+#define VK_COMPRESSED_NAME             cpu_to_le16(0x0001)
+       le16 flags;
+       le16 unknown_0x16;
+       char name[0];
+};
+
+/* Data cell - contains a value's data  */
+struct data_cell {
+       le32 size;
+       u8 data[0];
+};
+
+static enum hive_status
+translate_wimlib_error(int ret)
+{
+       if (likely(!ret))
+               return HIVE_OK;
+       if (ret == WIMLIB_ERR_NOMEM)
+               return HIVE_OUT_OF_MEMORY;
+       return HIVE_UNSUPPORTED;
+}
+
+/* Compare a UTF-16LE name with a key or value name in the registry.  The
+ * comparison is case insensitive.  */
+static inline bool
+names_equal(const utf16lechar *name, size_t name_nchars,
+           const void *disk_name, size_t disk_name_size,
+           bool compressed)
+{
+       if (compressed) {
+               /* ISO-8859-1 (LATIN1) on-disk  */
+               const u8 *p = disk_name;
+               if (disk_name_size != name_nchars)
+                       return false;
+               for (size_t i = 0; i < name_nchars; i++)
+                       if (upcase[le16_to_cpu(name[i])] != upcase[p[i]])
+                               return false;
+               return true;
+       } else {
+               /* UTF-16LE on disk  */
+               disk_name_size /= 2;
+               if (disk_name_size != name_nchars)
+                       return false;
+               return !cmp_utf16le_strings(name, name_nchars,
+                                           disk_name, disk_name_size, true);
+       }
+}
+
+/* Get a pointer to a cell, with alignment and bounds checking.  Returns NULL if
+ * the requested information does not specify a properly aligned, sized, and
+ * in-use cell.  */
+static const void *
+get_cell_pointer(const struct regf *regf, le32 offset, size_t wanted_size)
+{
+       u32 total = le32_to_cpu(regf->total_hbin_size);
+       u32 offs = le32_to_cpu(offset);
+       const struct cell *cell;
+       s32 actual_size;
+
+       if ((offs > total) || (offs & 7) || (wanted_size > total - offs))
+               return NULL;
+
+       cell = (const struct cell *)&regf->hbin_area[offs];
+       actual_size = le32_to_cpu(cell->size);
+       if (actual_size >= 0) /* Cell not in use?  */
+               return NULL;
+       if (wanted_size > -actual_size) /* Cell too small?  */
+               return NULL;
+       return cell;
+}
+
+/* Revalidate the cell with its full length.  Returns true iff the cell is
+ * valid.  */
+static bool
+revalidate_cell(const struct regf *regf, le32 offset, size_t wanted_size)
+{
+       return get_cell_pointer(regf, offset, wanted_size) != NULL;
+}
+
+/*
+ * Given a registry key cell @nk, look up the next component of the key
+ * *key_namep.  If found, return HIVE_OK, advance *key_namep past the key name
+ * component, and return the subkey cell in @sub_nk_ret.  Otherwise, return
+ * another HIVE_* error code.
+ */
+static enum hive_status
+lookup_subkey(const struct regf *regf, const utf16lechar **key_namep,
+             const struct nk *nk, const struct nk **sub_nk_ret)
+{
+       const utf16lechar *key_name = *key_namep;
+       size_t key_name_nchars = 0;
+       size_t num_subkeys;
+       const struct cell *subkey_list;
+
+       for (const utf16lechar *p = key_name;
+            *p && *p != cpu_to_le16('\\'); p++)
+               key_name_nchars++;
+
+       num_subkeys = le32_to_cpu(nk->num_subkeys);
+
+       if (num_subkeys == 0) /* No subkeys?  */
+               return HIVE_KEY_NOT_FOUND;
+
+       if (num_subkeys > 65536) /* Arbitrary limit */
+               return HIVE_CORRUPT;
+
+       /* Find the subkey list cell.  */
+       subkey_list = get_cell_pointer(regf, nk->subkey_list_offset,
+                                      sizeof(struct cell));
+       if (!subkey_list)
+               return HIVE_CORRUPT;
+
+       if (subkey_list->magic == LF_MAGIC || subkey_list->magic == LH_MAGIC) {
+               const struct lf *lf;
+
+               /* Handle LF and LH subkey lists.  */
+
+               lf = get_cell_pointer(regf, nk->subkey_list_offset,
+                                     sizeof(struct lf) +
+                                     (num_subkeys * sizeof(lf->subkeys[0])));
+               if (!lf)
+                       return HIVE_CORRUPT;
+
+               /* Look for the subkey in the subkey list.  */
+               for (size_t i = 0; i < num_subkeys; i++) {
+                       const struct nk *sub_nk;
+                       size_t name_size;
+
+                       sub_nk = get_cell_pointer(regf, lf->subkeys[i].offset,
+                                                 sizeof(struct nk));
+                       if (!sub_nk)
+                               return HIVE_CORRUPT;
+
+                       name_size = le16_to_cpu(sub_nk->name_size);
+
+                       if (!revalidate_cell(regf, lf->subkeys[i].offset,
+                                            sizeof(struct nk) + name_size))
+                               return HIVE_CORRUPT;
+
+                       if (names_equal(key_name, key_name_nchars,
+                                       sub_nk->name, name_size,
+                                       (sub_nk->flags & NK_COMPRESSED_NAME)))
+                       {
+                               key_name += key_name_nchars;
+                               while (*key_name == cpu_to_le16('\\'))
+                                       key_name++;
+                               *key_namep = key_name;
+                               *sub_nk_ret = sub_nk;
+                               return HIVE_OK;
+                       }
+               }
+               return HIVE_KEY_NOT_FOUND;
+       }
+
+       return HIVE_UNSUPPORTED;
+}
+
+/* Find the nk cell for the key named @key_name in the registry hive @regf.  */
+static enum hive_status
+lookup_key(const struct regf *regf, const tchar *key_name,
+          const struct nk **nk_ret)
+{
+       const struct nk *nk;
+       enum hive_status status;
+       const utf16lechar *key_uname, *key_unamep;
+
+       nk = get_cell_pointer(regf, regf->root_key_offset, sizeof(struct nk));
+       if (!nk)
+               return HIVE_CORRUPT;
+
+       status = translate_wimlib_error(tstr_get_utf16le(key_name, &key_uname));
+       if (status != HIVE_OK)
+               return status;
+       key_unamep = key_uname;
+       while (*key_unamep) {
+               status = lookup_subkey(regf, &key_unamep, nk, &nk);
+               if (status != HIVE_OK)
+                       goto out;
+       }
+       *nk_ret = nk;
+       status = HIVE_OK;
+out:
+       tstr_put_utf16le(key_uname);
+       return status;
+}
+
+/* Find the vk cell for the value named @value_name of the key named @key_name
+ * in the registry hive @regf.  */
+static enum hive_status
+lookup_value(const struct regf *regf, const tchar *key_name,
+            const tchar *value_name, const struct vk **vk_ret)
+{
+       enum hive_status status;
+       const struct nk *nk;
+       size_t num_values;
+       const struct value_list *value_list;
+       const  utf16lechar *value_uname;
+       size_t value_uname_nchars;
+
+       /* Look up the nk cell for the key.  */
+       status = lookup_key(regf, key_name, &nk);
+       if (status != HIVE_OK)
+               return status;
+
+       num_values = le32_to_cpu(nk->num_values);
+
+       if (num_values == 0) /* No values?  */
+               return HIVE_VALUE_NOT_FOUND;
+
+       if (num_values > 65536) /* Arbitrary limit */
+               return HIVE_CORRUPT;
+
+       value_list = get_cell_pointer(regf, nk->value_list_offset,
+                                     sizeof(struct value_list) +
+                                     (num_values *
+                                      sizeof(value_list->vk_offsets[0])));
+       if (!value_list)
+               return HIVE_CORRUPT;
+
+       /* Look for the value in the value list.  */
+
+       status = translate_wimlib_error(
+                       tstr_get_utf16le_and_len(value_name, &value_uname,
+                                                &value_uname_nchars));
+       if (status != HIVE_OK)
+               return status;
+       value_uname_nchars /= 2;
+
+       for (size_t i = 0; i < num_values; i++) {
+               const struct vk *vk;
+               size_t name_size;
+
+               status = HIVE_CORRUPT;
+               vk = get_cell_pointer(regf, value_list->vk_offsets[i],
+                                     sizeof(struct vk));
+               if (!vk)
+                       goto out;
+
+               name_size = le16_to_cpu(vk->name_size);
+
+               if (!revalidate_cell(regf, value_list->vk_offsets[i],
+                                    sizeof(struct vk) + name_size))
+                       goto out;
+
+               if (names_equal(value_uname, value_uname_nchars,
+                               vk->name, name_size,
+                                (vk->flags & VK_COMPRESSED_NAME)))
+               {
+                       *vk_ret = vk;
+                       status = HIVE_OK;
+                       goto out;
+               }
+       }
+
+       status = HIVE_VALUE_NOT_FOUND;
+out:
+       tstr_put_utf16le(value_uname);
+       return status;
+}
+
+/*
+ * Retrieve the data of the value named @value_name of the key named @key_name
+ * in the registry hive @regf.  If the value was found, return HIVE_OK and
+ * return the data, its size, and its type in @data_ret, @data_size_ret, and
+ * @data_type_ret.  Otherwise, return another HIVE_* error code.
+ */
+static enum hive_status
+retrieve_value(const struct regf *regf, const tchar *key_name,
+              const tchar *value_name, void **data_ret,
+              size_t *data_size_ret, le32 *data_type_ret)
+{
+       enum hive_status status;
+       const struct vk *vk;
+       size_t data_size;
+       bool is_inline;
+       const void *data;
+
+       /* Find the vk cell.  */
+       status = lookup_value(regf, key_name, value_name, &vk);
+       if (status != HIVE_OK)
+               return status;
+
+       /* Extract the value data from the vk cell (for inline data) or from the
+        * data cell which it references (for non-inline data).  */
+
+       data_size = le32_to_cpu(vk->data_size);
+
+       is_inline = (data_size & 0x80000000);
+       data_size &= 0x7FFFFFFF;
+
+       if (data_size > 1048576)        /* Arbitrary limit */
+               return HIVE_CORRUPT;
+
+       if (is_inline) {
+               if (data_size > 4)
+                       return HIVE_CORRUPT;
+               data = &vk->data_offset;
+       } else {
+               const struct data_cell *data_cell;
+
+               data_cell = get_cell_pointer(regf, vk->data_offset,
+                                            sizeof(struct data_cell));
+               if (!data_cell)
+                       return HIVE_CORRUPT;
+
+               if (!revalidate_cell(regf, vk->data_offset,
+                                    sizeof(struct data_cell) + data_size))
+                       return HIVE_UNSUPPORTED; /* Possibly a big data cell  */
+
+               data = data_cell->data;
+       }
+
+       *data_ret = memdup(data, data_size);
+       if (!*data_ret)
+               return HIVE_OUT_OF_MEMORY;
+       *data_size_ret = data_size;
+       *data_type_ret = vk->data_type;
+       return HIVE_OK;
+}
+
+/* Validate the registry hive file given in memory as @hive_mem and @hive_size.
+ * If valid, return HIVE_OK.  If invalid, return another HIVE_* error code.  */
+enum hive_status
+hive_validate(const void *hive_mem, size_t hive_size)
+{
+       const struct regf *regf = hive_mem;
+
+       STATIC_ASSERT(sizeof(struct regf) == 4096);
+
+       if (hive_size < sizeof(struct regf))
+               return HIVE_CORRUPT;
+
+       if (regf->magic != REGF_MAGIC || regf->major_version != REGF_MAJOR)
+               return HIVE_UNSUPPORTED;
+
+       if (le32_to_cpu(regf->total_hbin_size) > hive_size - sizeof(struct regf))
+               return HIVE_CORRUPT;
+
+       return HIVE_OK;
+}
+
+/* Get a string value from the registry hive file.  */
+enum hive_status
+hive_get_string(const struct regf *regf, const tchar *key_name,
+               const tchar *value_name, tchar **value_ret)
+{
+       void *data;
+       size_t data_size;
+       le32 data_type;
+       enum hive_status status;
+
+       /* Retrieve the raw value data.  */
+       status = retrieve_value(regf, key_name, value_name,
+                               &data, &data_size, &data_type);
+       if (status != HIVE_OK)
+               return status;
+
+       /* Interpret the data as a string, when possible.  */
+       switch (data_type) {
+       case REG_SZ:
+       case REG_MULTI_SZ:
+               status = translate_wimlib_error(
+                       utf16le_to_tstr(data, data_size, value_ret, &data_size));
+               break;
+       default:
+               status = HIVE_VALUE_IS_WRONG_TYPE;
+               break;
+       }
+       FREE(data);
+       return status;
+}
+
+/* Get a number value from the registry hive file.  */
+enum hive_status
+hive_get_number(const struct regf *regf, const tchar *key_name,
+               const tchar *value_name, s64 *value_ret)
+{
+       void *data;
+       size_t data_size;
+       le32 data_type;
+       enum hive_status status;
+
+       /* Retrieve the raw value data.  */
+       status = retrieve_value(regf, key_name, value_name,
+                               &data, &data_size, &data_type);
+       if (status != HIVE_OK)
+               return status;
+
+       /* Interpret the data as a number, when possible.  */
+       switch (data_type) {
+       case REG_DWORD_LITTLE_ENDIAN:
+               if (data_size == 4) {
+                       *value_ret = le32_to_cpu(*(le32 *)data);
+                       status = HIVE_OK;
+               } else {
+                       status = HIVE_CORRUPT;
+               }
+               break;
+       case REG_DWORD_BIG_ENDIAN:
+               if (data_size == 4) {
+                       *value_ret = be32_to_cpu(*(be32 *)data);
+                       status = HIVE_OK;
+               } else {
+                       status = HIVE_CORRUPT;
+               }
+               break;
+       case REG_QWORD_LITTLE_ENDIAN:
+               if (data_size == 8) {
+                       *value_ret = le64_to_cpu(*(le64 *)data);
+                       status = HIVE_OK;
+               } else {
+                       status = HIVE_CORRUPT;
+               }
+               break;
+       default:
+               status = HIVE_VALUE_IS_WRONG_TYPE;
+               break;
+       }
+
+       FREE(data);
+       return status;
+}
+
+/* List the subkeys of the specified registry key.  */
+enum hive_status
+hive_list_subkeys(const struct regf *regf, const tchar *key_name,
+                 tchar ***subkeys_ret)
+{
+       enum hive_status status;
+       const struct nk *nk;
+       size_t num_subkeys;
+       const struct cell *subkey_list;
+       tchar **subkeys;
+
+       /* Look up the nk cell for the key.  */
+       status = lookup_key(regf, key_name, &nk);
+       if (status != HIVE_OK)
+               return status;
+
+       num_subkeys = le32_to_cpu(nk->num_subkeys);
+
+       if (num_subkeys > 65536) /* Arbitrary limit */
+               return HIVE_CORRUPT;
+
+       /* Prepare the array of subkey names to return.  */
+       subkeys = CALLOC(num_subkeys + 1, sizeof(subkeys[0]));
+       if (!subkeys)
+               return HIVE_OUT_OF_MEMORY;
+       *subkeys_ret = subkeys;
+
+       /* No subkeys?  */
+       if (num_subkeys == 0)
+               return HIVE_OK;
+
+       /* Find the subkey list cell.  */
+       status = HIVE_CORRUPT;
+       subkey_list = get_cell_pointer(regf, nk->subkey_list_offset,
+                                      sizeof(struct cell));
+       if (!subkey_list)
+               goto err;
+
+       if (subkey_list->magic == LF_MAGIC || subkey_list->magic == LH_MAGIC) {
+               const struct lf *lf;
+
+               /* Handle LF and LH subkey lists.  */
+
+               status = HIVE_CORRUPT;
+               lf = get_cell_pointer(regf, nk->subkey_list_offset,
+                                     sizeof(struct lf) +
+                                     (num_subkeys * sizeof(lf->subkeys[0])));
+               if (!lf)
+                       goto err;
+
+               /* Iterate through the subkey list and gather the subkey names.
+                */
+               for (size_t i = 0; i < num_subkeys; i++) {
+                       const struct nk *sub_nk;
+                       size_t name_size;
+                       tchar *subkey;
+                       size_t dummy;
+
+                       status = HIVE_CORRUPT;
+                       sub_nk = get_cell_pointer(regf, lf->subkeys[i].offset,
+                                                 sizeof(struct nk));
+                       if (!sub_nk)
+                               goto err;
+
+                       name_size = le16_to_cpu(sub_nk->name_size);
+
+                       if (!revalidate_cell(regf, lf->subkeys[i].offset,
+                                            sizeof(struct nk) + name_size))
+                               goto err;
+
+                       if (sub_nk->flags & NK_COMPRESSED_NAME) {
+                               status = HIVE_OUT_OF_MEMORY;
+                               subkey = MALLOC((name_size + 1) * sizeof(tchar));
+                               if (!subkey)
+                                       goto err;
+                               for (size_t j = 0; j < name_size; j++)
+                                       subkey[j] = sub_nk->name[j];
+                               subkey[name_size] = '\0';
+                       } else {
+                               status = translate_wimlib_error(
+                                       utf16le_to_tstr((utf16lechar *)sub_nk->name,
+                                                       name_size, &subkey, &dummy));
+                               if (status != HIVE_OK)
+                                       goto err;
+                       }
+                       subkeys[i] = subkey;
+               }
+               return HIVE_OK;
+       }
+
+       status = HIVE_UNSUPPORTED;
+err:
+       hive_free_subkeys_list(subkeys);
+       return status;
+}
+
+void
+hive_free_subkeys_list(tchar **subkeys)
+{
+       for (tchar **p = subkeys; *p; p++)
+               FREE(*p);
+       FREE(subkeys);
+}
+
+const char *
+hive_status_to_string(enum hive_status status)
+{
+       switch (status) {
+       case HIVE_OK:
+               return "HIVE_OK";
+       case HIVE_CORRUPT:
+               return "HIVE_CORRUPT";
+       case HIVE_UNSUPPORTED:
+               return "HIVE_UNSUPPORTED";
+       case HIVE_KEY_NOT_FOUND:
+               return "HIVE_KEY_NOT_FOUND";
+       case HIVE_VALUE_NOT_FOUND:
+               return "HIVE_VALUE_NOT_FOUND";
+       case HIVE_VALUE_IS_WRONG_TYPE:
+               return "HIVE_VALUE_IS_WRONG_TYPE";
+       case HIVE_OUT_OF_MEMORY:
+               return "HIVE_OUT_OF_MEMORY";
+       }
+       return NULL;
+}