X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdentry.c;h=8a285e25a0b6f4590a4efac5d3e622eced0e8b42;hp=3f258b76d9151e8e1e6101d155a900c6fb78e05c;hb=72f16c2c9e53f83210e8679f487d4a64d8c897e7;hpb=075e1470621233971f5785ce12e5106f3c0f4d42 diff --git a/src/dentry.c b/src/dentry.c index 3f258b76..8a285e25 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -35,12 +35,12 @@ #include #include -#include "wimlib_internal.h" #include "dentry.h" #include "io.h" -#include "timestamp.h" #include "lookup_table.h" #include "sha1.h" +#include "timestamp.h" +#include "wimlib_internal.h" /* * Returns true if @dentry has the UTF-8 file name @name that has length @@ -54,14 +54,48 @@ static bool dentry_has_name(const struct dentry *dentry, const char *name, return memcmp(dentry->file_name_utf8, name, name_len) == 0; } +static u64 __dentry_correct_length_unaligned(u16 file_name_len, + u16 short_name_len) +{ + u64 length = WIM_DENTRY_DISK_SIZE; + if (file_name_len) + length += file_name_len + 2; + if (short_name_len) + length += short_name_len + 2; + return length; +} + +static u64 dentry_correct_length_unaligned(const struct dentry *dentry) +{ + return __dentry_correct_length_unaligned(dentry->file_name_len, + dentry->short_name_len); +} + +/* Return the "correct" value to write in the length field of the dentry, based + * on the file name length and short name length */ +static u64 dentry_correct_length(const struct dentry *dentry) +{ + return (dentry_correct_length_unaligned(dentry) + 7) & ~7; +} + +static u64 __dentry_total_length(const struct dentry *dentry, u64 length) +{ + for (u16 i = 0; i < dentry->num_ads; i++) + length += ads_entry_total_length(&dentry->ads_entries[i]); + return (length + 7) & ~7; +} + +u64 dentry_correct_total_length(const struct dentry *dentry) +{ + return __dentry_total_length(dentry, + dentry_correct_length_unaligned(dentry)); +} + /* Real length of a dentry, including the alternate data stream entries, which * are not included in the dentry->length field... */ u64 dentry_total_length(const struct dentry *dentry) { - u64 length = (dentry->length + 7) & ~7; - for (u16 i = 0; i < dentry->num_ads; i++) - length += ads_entry_length(&dentry->ads_entries[i]); - return length; + return __dentry_total_length(dentry, dentry->length); } /* Transfers file attributes from a `stat' buffer to a struct dentry. */ @@ -87,7 +121,7 @@ void stbuf_to_dentry(const struct stat *stbuf, struct dentry *dentry) } -/* Makes all timestamp fields for the dentry be the current time. */ +/* Sets all the timestamp fields of the dentry to the current time. */ void dentry_update_all_timestamps(struct dentry *dentry) { u64 now = get_wim_timestamp(); @@ -96,6 +130,8 @@ void dentry_update_all_timestamps(struct dentry *dentry) dentry->last_write_time = now; } +/* Returns the alternate data stream entry belonging to @dentry that has the + * stream name @stream_name. */ struct ads_entry *dentry_get_ads_entry(struct dentry *dentry, const char *stream_name) { @@ -121,8 +157,10 @@ static void ads_entry_init(struct ads_entry *ads_entry) ads_entry->lte_group_list.type = STREAM_TYPE_ADS; } -/* Add an alternate stream entry to a dentry and return a pointer to it, or NULL - * on failure. */ +/* + * Add an alternate stream entry to a dentry and return a pointer to it, or NULL + * if memory could not be allocated. + */ struct ads_entry *dentry_add_ads(struct dentry *dentry, const char *stream_name) { u16 num_ads; @@ -157,6 +195,14 @@ struct ads_entry *dentry_add_ads(struct dentry *dentry, const char *stream_name) return new_entry; } +/* Remove an alternate data stream from a dentry. + * + * The corresponding lookup table entry for the stream is NOT changed. + * + * @dentry: The dentry + * @ads_entry: The alternate data stream entry (it MUST be one of the + * ads_entry's in the array dentry->ads_entries). + */ void dentry_remove_ads(struct dentry *dentry, struct ads_entry *ads_entry) { u16 idx; @@ -296,12 +342,12 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) child = dentry->children; dentry->subdir_offset = *subdir_offset_p; - if (child) { + if (child) { /* Advance the subdir offset by the amount of space the children * of this dentry take up. */ do { - *subdir_offset_p += dentry_total_length(child); + *subdir_offset_p += dentry_correct_total_length(child); child = child->next; } while (child != dentry->children); @@ -317,7 +363,7 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) } else { /* On disk, childless directories have a valid subdir_offset * that points to an 8-byte end-of-directory dentry. Regular - * files have a subdir_offset of 0. */ + * files or reparse points have a subdir_offset of 0. */ if (dentry_is_directory(dentry)) *subdir_offset_p += 8; else @@ -329,7 +375,7 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) /* Returns the child of @dentry that has the file name @name. * Returns NULL if no child has the name. */ struct dentry *get_dentry_child_with_name(const struct dentry *dentry, - const char *name) + const char *name) { struct dentry *child; size_t name_len; @@ -348,7 +394,8 @@ struct dentry *get_dentry_child_with_name(const struct dentry *dentry, /* Retrieves the dentry that has the UTF-8 @path relative to the dentry * @cur_dir. Returns NULL if no dentry having the path is found. */ -static struct dentry *get_dentry_relative_path(struct dentry *cur_dir, const char *path) +static struct dentry *get_dentry_relative_path(struct dentry *cur_dir, + const char *path) { struct dentry *child; size_t base_len; @@ -379,7 +426,8 @@ struct dentry *get_dentry(WIMStruct *w, const char *path) return get_dentry_relative_path(root, path); } -/* Returns the parent directory for the @path. */ +/* Returns the dentry that corresponds to the parent directory of @path, or NULL + * if the dentry is not found. */ struct dentry *get_parent_dentry(WIMStruct *w, const char *path) { size_t path_len = strlen(path); @@ -400,34 +448,39 @@ int print_dentry_full_path(struct dentry *dentry, void *ignore) return 0; } +/* We want to be able to show the names of the file attribute flags that are + * set. */ struct file_attr_flag { u32 flag; const char *name; }; struct file_attr_flag file_attr_flags[] = { - {FILE_ATTRIBUTE_READONLY, "READONLY"}, - {FILE_ATTRIBUTE_HIDDEN, "HIDDEN"}, - {FILE_ATTRIBUTE_SYSTEM, "SYSTEM"}, - {FILE_ATTRIBUTE_DIRECTORY, "DIRECTORY"}, - {FILE_ATTRIBUTE_ARCHIVE, "ARCHIVE"}, - {FILE_ATTRIBUTE_DEVICE, "DEVICE"}, - {FILE_ATTRIBUTE_NORMAL, "NORMAL"}, - {FILE_ATTRIBUTE_TEMPORARY, "TEMPORARY"}, - {FILE_ATTRIBUTE_SPARSE_FILE, "SPARSE_FILE"}, - {FILE_ATTRIBUTE_REPARSE_POINT, "REPARSE_POINT"}, - {FILE_ATTRIBUTE_COMPRESSED, "COMPRESSED"}, - {FILE_ATTRIBUTE_OFFLINE, "OFFLINE"}, + {FILE_ATTRIBUTE_READONLY, "READONLY"}, + {FILE_ATTRIBUTE_HIDDEN, "HIDDEN"}, + {FILE_ATTRIBUTE_SYSTEM, "SYSTEM"}, + {FILE_ATTRIBUTE_DIRECTORY, "DIRECTORY"}, + {FILE_ATTRIBUTE_ARCHIVE, "ARCHIVE"}, + {FILE_ATTRIBUTE_DEVICE, "DEVICE"}, + {FILE_ATTRIBUTE_NORMAL, "NORMAL"}, + {FILE_ATTRIBUTE_TEMPORARY, "TEMPORARY"}, + {FILE_ATTRIBUTE_SPARSE_FILE, "SPARSE_FILE"}, + {FILE_ATTRIBUTE_REPARSE_POINT, "REPARSE_POINT"}, + {FILE_ATTRIBUTE_COMPRESSED, "COMPRESSED"}, + {FILE_ATTRIBUTE_OFFLINE, "OFFLINE"}, {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,"NOT_CONTENT_INDEXED"}, - {FILE_ATTRIBUTE_ENCRYPTED, "ENCRYPTED"}, - {FILE_ATTRIBUTE_VIRTUAL, "VIRTUAL"}, + {FILE_ATTRIBUTE_ENCRYPTED, "ENCRYPTED"}, + {FILE_ATTRIBUTE_VIRTUAL, "VIRTUAL"}, }; -/* Prints a directory entry. @lookup_table is a pointer to the lookup table, or - * NULL if the resource entry for the dentry is not to be printed. */ +/* Prints a directory entry. @lookup_table is a pointer to the lookup table, if + * available. If the dentry is unresolved and the lookup table is NULL, the + * lookup table entries will not be printed. Otherwise, they will be. */ int print_dentry(struct dentry *dentry, void *lookup_table) { const u8 *hash; struct lookup_table_entry *lte; + time_t time; + char *p; printf("[DENTRY]\n"); printf("Length = %"PRIu64"\n", dentry->length); @@ -448,12 +501,25 @@ int print_dentry(struct dentry *dentry, void *lookup_table) printf("Last Write Time = 0x%"PRIx64"\n"); #endif + /* Translate the timestamps into something readable */ time_t creat_time = wim_timestamp_to_unix(dentry->creation_time); time_t access_time = wim_timestamp_to_unix(dentry->last_access_time); time_t mod_time = wim_timestamp_to_unix(dentry->last_write_time); - printf("Creation Time = %s", asctime(localtime(&creat_time))); - printf("Last Access Time = %s", asctime(localtime(&access_time))); - printf("Last Write Time = %s", asctime(localtime(&mod_time))); + + time = wim_timestamp_to_unix(dentry->creation_time); + p = asctime(gmtime(&time)); + *(strrchr(p, '\n')) = '\0'; + printf("Creation Time = %s UTC\n", p); + + time = wim_timestamp_to_unix(dentry->last_access_time); + p = asctime(gmtime(&time)); + *(strrchr(p, '\n')) = '\0'; + printf("Last Access Time = %s UTC\n", p); + + time = wim_timestamp_to_unix(dentry->last_write_time); + p = asctime(gmtime(&time)); + *(strrchr(p, '\n')) = '\0'; + printf("Last Write Time = %s UTC\n", p); printf("Reparse Tag = 0x%"PRIx32"\n", dentry->reparse_tag); printf("Hard Link Group = 0x%"PRIx64"\n", dentry->hard_link); @@ -498,7 +564,8 @@ int print_dentry(struct dentry *dentry, void *lookup_table) return 0; } -static inline void dentry_common_init(struct dentry *dentry) +/* Initializations done on every `struct dentry'. */ +static void dentry_common_init(struct dentry *dentry) { memset(dentry, 0, sizeof(struct dentry)); dentry->refcnt = 1; @@ -510,8 +577,9 @@ static inline void dentry_common_init(struct dentry *dentry) /* * Creates an unlinked directory entry. * - * @name: The base name of the new dentry. - * @return: A pointer to the new dentry, or NULL if out of memory. + * @name: The UTF-8 filename of the new dentry. + * + * Returns a pointer to the new dentry, or NULL if out of memory. */ struct dentry *new_dentry(const char *name) { @@ -555,10 +623,13 @@ static void __destroy_dentry(struct dentry *dentry) FREE(dentry->extracted_file); } +/* Frees a WIM dentry. */ void free_dentry(struct dentry *dentry) { wimlib_assert(dentry); __destroy_dentry(dentry); + /* Don't destroy the ADS entries if they "belong" to a different dentry + * */ if (dentry->ads_entries_status != ADS_ENTRIES_USER) dentry_free_ads_entries(dentry); FREE(dentry); @@ -586,12 +657,13 @@ void put_dentry(struct dentry *dentry) } -/* clones a dentry. +/* Partically clones a dentry. * * Beware: - * - memory for file names is not cloned + * - memory for file names is not cloned (the pointers are all set to NULL + * and the lengths are set to zero) * - next, prev, and children pointers and not touched - * - stream entries are not cloned. + * - stream entries are not cloned (pointer left untouched). */ struct dentry *clone_dentry(struct dentry *old) { @@ -615,13 +687,14 @@ struct dentry *clone_dentry(struct dentry *old) static int do_free_dentry(struct dentry *dentry, void *__lookup_table) { struct lookup_table *lookup_table = __lookup_table; + struct lookup_table_entry *lte; + unsigned i; + if (lookup_table) { - struct lookup_table_entry *lte; - if (dentry->resolved) - lte = dentry->lte; - else - lte = __lookup_resource(lookup_table, dentry->hash); - lte_decrement_refcnt(lte, lookup_table); + for (i = 0; i <= dentry->num_ads; i++) { + lte = dentry_stream_lte(dentry, i, lookup_table); + lte_decrement_refcnt(lte, lookup_table); + } } wimlib_assert(dentry->refcnt != 0); @@ -697,17 +770,6 @@ void unlink_dentry(struct dentry *dentry) } } - -/* Recalculates the length of @dentry based on its file name length and short - * name length. */ -static inline void recalculate_dentry_size(struct dentry *dentry) -{ - dentry->length = WIM_DENTRY_DISK_SIZE + dentry->file_name_len + - 2 + dentry->short_name_len; - /* Must be multiple of 8. */ - dentry->length = (dentry->length + 7) & ~7; -} - /* Duplicates a UTF-8 name into UTF-8 and UTF-16 strings and returns the strings * and their lengths in the pointer arguments */ int get_names(char **name_utf16_ret, char **name_utf8_ret, @@ -751,10 +813,12 @@ int change_dentry_name(struct dentry *dentry, const char *new_name) &dentry->file_name_len, &dentry->file_name_utf8_len, new_name); if (ret == 0) - recalculate_dentry_size(dentry); + dentry->length = dentry_correct_length(dentry); return ret; } +/* + * Changes the name of an alternate data stream */ int change_ads_name(struct ads_entry *entry, const char *new_name) { return get_names(&entry->stream_name, &entry->stream_name_utf8, @@ -776,7 +840,6 @@ static int calculate_dentry_statistics(struct dentry *dentry, void *arg) { struct image_statistics *stats; struct lookup_table_entry *lte; - u16 i; stats = arg; @@ -785,28 +848,18 @@ static int calculate_dentry_statistics(struct dentry *dentry, void *arg) else ++*stats->file_count; - if (dentry->resolved) - lte = dentry->lte; - else - lte = __lookup_resource(stats->lookup_table, dentry->hash); - i = 0; - while (1) { + for (unsigned i = 0; i <= dentry->num_ads; i++) { + lte = dentry_stream_lte(dentry, i, stats->lookup_table); if (lte) { - u64 size = lte->resource_entry.original_size; - *stats->total_bytes += size; + *stats->total_bytes += wim_resource_size(lte); if (++lte->out_refcnt == 1) - *stats->hard_link_bytes += size; + *stats->hard_link_bytes += wim_resource_size(lte); } - if (i == dentry->num_ads) - break; - lte = __lookup_resource(stats->lookup_table, - dentry->ads_entries[i].hash); - i++; } - return 0; } +/* Calculates some statistics about a dentry tree. */ void calculate_dir_tree_statistics(struct dentry *root, struct lookup_table *table, u64 *dir_count_ret, u64 *file_count_ret, u64 *total_bytes_ret, @@ -826,25 +879,70 @@ void calculate_dir_tree_statistics(struct dentry *root, struct lookup_table *tab for_dentry_in_tree(root, calculate_dentry_statistics, &stats); } + +/* + * Reads the alternate data stream entries for a dentry. + * + * @p: Pointer to buffer that starts with the first alternate stream entry. + * + * @dentry: Dentry to load the alternate data streams into. + * @dentry->num_ads must have been set to the number of + * alternate data streams that are expected. + * + * @remaining_size: Number of bytes of data remaining in the buffer pointed + * to by @p. + * + * The format of the on-disk alternate stream entries is as follows: + * + * struct ads_entry_on_disk { + * u64 length; // Length of the entry, in bytes. This includes + * all fields (including the stream name and + * null terminator if present, AND the padding!). + * u64 reserved; // Seems to be unused + * u8 hash[20]; // SHA1 message digest of the uncompressed stream + * u16 stream_name_len; // Length of the stream name, in bytes + * char stream_name[]; // Stream name in UTF-16LE, @stream_name_len bytes long, + * not including null terminator + * u16 zero; // UTF-16 null terminator for the stream name, NOT + * included in @stream_name_len. Based on what + * I've observed from filenames in dentries, + * this field should not exist when + * (@stream_name_len == 0), but you can't + * actually tell because of the padding anyway + * (provided that the padding is zeroed, which + * it always seems to be). + * char padding[]; // Padding to make the size a multiple of 8 bytes. + * }; + * + * In addition, the entries are 8-byte aligned. + * + * Return 0 on success or nonzero on failure. On success, dentry->ads_entries + * is set to an array of `struct ads_entry's of length dentry->num_ads. On + * failure, @dentry is not modified. + */ static int read_ads_entries(const u8 *p, struct dentry *dentry, u64 remaining_size) { - u16 num_ads = dentry->num_ads; - struct ads_entry *ads_entries = CALLOC(num_ads, sizeof(struct ads_entry)); + u16 num_ads; + struct ads_entry *ads_entries; int ret; + + num_ads = dentry->num_ads; + ads_entries = CALLOC(num_ads, sizeof(struct ads_entry)); if (!ads_entries) { ERROR("Could not allocate memory for %"PRIu16" " "alternate data stream entries", num_ads); return WIMLIB_ERR_NOMEM; } - DEBUG2("Reading %"PRIu16" alternate data streams " - "(remaining size = %"PRIu64")", num_ads, remaining_size); for (u16 i = 0; i < num_ads; i++) { struct ads_entry *cur_entry = &ads_entries[i]; u64 length; + u64 length_no_padding; + u64 total_length; size_t utf8_len; const char *p_save = p; + /* Read the base stream entry, excluding the stream name. */ if (remaining_size < WIM_ADS_ENTRY_DISK_SIZE) { ERROR("Stream entries go past end of metadata resource"); @@ -852,48 +950,81 @@ static int read_ads_entries(const u8 *p, struct dentry *dentry, ret = WIMLIB_ERR_INVALID_DENTRY; goto out_free_ads_entries; } - remaining_size -= WIM_ADS_ENTRY_DISK_SIZE; - p = get_u64(p, &length); /* ADS entry length */ - - DEBUG2("ADS length = %"PRIu64, length); - - p += 8; /* Unused */ + p = get_u64(p, &length); + p += 8; /* Skip the reserved field */ p = get_bytes(p, SHA1_HASH_SIZE, (u8*)cur_entry->hash); p = get_u16(p, &cur_entry->stream_name_len); - DEBUG2("Stream name length = %u", cur_entry->stream_name_len); - cur_entry->stream_name = NULL; cur_entry->stream_name_utf8 = NULL; - if (remaining_size < cur_entry->stream_name_len + 2) { + /* Length including neither the null terminator nor the padding + * */ + length_no_padding = WIM_ADS_ENTRY_DISK_SIZE + + cur_entry->stream_name_len; + + /* Length including the null terminator and the padding */ + total_length = ((length_no_padding + 2) + 7) & ~7; + + wimlib_assert(total_length == ads_entry_total_length(cur_entry)); + + if (remaining_size < length_no_padding) { ERROR("Stream entries go past end of metadata resource"); - ERROR("(remaining_size = %"PRIu64" bytes, stream_name_len " - "= %"PRIu16" bytes", remaining_size, - cur_entry->stream_name_len); + ERROR("(remaining_size = %"PRIu64" bytes, " + "length_no_padding = %"PRIu16" bytes)", + remaining_size, length_no_padding); ret = WIMLIB_ERR_INVALID_DENTRY; goto out_free_ads_entries; } - remaining_size -= cur_entry->stream_name_len + 2; - cur_entry->stream_name = MALLOC(cur_entry->stream_name_len); - if (!cur_entry->stream_name) { - ret = WIMLIB_ERR_NOMEM; + /* The @length field in the on-disk ADS entry is expected to be + * equal to @total_length, which includes all of the entry and + * the padding that follows it to align the next ADS entry to an + * 8-byte boundary. However, to be safe, we'll accept the + * length field as long as it's not less than the un-padded + * total length and not more than the padded total length. */ + if (length < length_no_padding || length > total_length) { + ERROR("Stream entry has unexpected length " + "field (length field = %"PRIu64", " + "unpadded total length = %"PRIu64", " + "padded total length = %"PRIu64")", + length, length_no_padding, total_length); + ret = WIMLIB_ERR_INVALID_DENTRY; goto out_free_ads_entries; } - get_bytes(p, cur_entry->stream_name_len, - (u8*)cur_entry->stream_name); - cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name, - cur_entry->stream_name_len, - &utf8_len); - cur_entry->stream_name_utf8_len = utf8_len; - - if (!cur_entry->stream_name_utf8) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_ads_entries; + + if (cur_entry->stream_name_len) { + cur_entry->stream_name = MALLOC(cur_entry->stream_name_len); + if (!cur_entry->stream_name) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_ads_entries; + } + get_bytes(p, cur_entry->stream_name_len, + (u8*)cur_entry->stream_name); + cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name, + cur_entry->stream_name_len, + &utf8_len); + cur_entry->stream_name_utf8_len = utf8_len; + + if (!cur_entry->stream_name_utf8) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_ads_entries; + } } - p = p_save + ads_entry_length(cur_entry); + /* It's expected that the size of every ADS entry is a multiple + * of 8. However, to be safe, I'm allowing the possibility of + * an ADS entry at the very end of the metadata resource ending + * un-aligned. So although we still need to increment the input + * pointer by @total_length to reach the next ADS entry, it's + * possible that less than @total_length is actually remaining + * in the metadata resource. We should set the remaining size to + * 0 bytes if this happens. */ + p = p_save + total_length; + if (remaining_size < total_length) + remaining_size = 0; + else + remaining_size -= total_length; } dentry->ads_entries = ads_entries; return 0; @@ -907,16 +1038,28 @@ out_free_ads_entries: } /* - * Reads a directory entry from the metadata resource. + * Reads a directory entry, including all alternate data stream entries that + * follow it, from the WIM image's metadata resource. + * + * @metadata_resource: Buffer containing the uncompressed metadata resource. + * @metadata_resource_len: Length of the metadata resource. + * @offset: Offset of this directory entry in the metadata resource. + * @dentry: A `struct dentry' that will be filled in by this function. + * + * Return 0 on success or nonzero on failure. On failure, @dentry have been + * modified, bu it will be left with no pointers to any allocated buffers. + * On success, the dentry->length field must be examined. If zero, this was a + * special "end of directory" dentry and not a real dentry. If nonzero, this + * was a real dentry. */ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, u64 offset, struct dentry *dentry) { const u8 *p; u64 calculated_size; - char *file_name; - char *file_name_utf8; - char *short_name; + char *file_name = NULL; + char *file_name_utf8 = NULL; + char *short_name = NULL; u16 short_name_len; u16 file_name_len; size_t file_name_utf8_len; @@ -925,15 +1068,15 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, dentry_common_init(dentry); /*Make sure the dentry really fits into the metadata resource.*/ - if (offset + 8 > metadata_resource_len) { + if (offset + 8 > metadata_resource_len || offset + 8 < offset) { ERROR("Directory entry starting at %"PRIu64" ends past the " "end of the metadata resource (size %"PRIu64")", offset, metadata_resource_len); return WIMLIB_ERR_INVALID_DENTRY; } - /* Before reading the whole entry, we need to read just the length. - * This is because an entry of length 8 (that is, just the length field) + /* Before reading the whole dentry, we need to read just the length. + * This is because a dentry of length 8 (that is, just the length field) * terminates the list of sibling directory entries. */ p = get_u64(&metadata_resource[offset], &dentry->length); @@ -941,11 +1084,17 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, /* A zero length field (really a length of 8, since that's how big the * directory entry is...) indicates that this is the end of directory * dentry. We do not read it into memory as an actual dentry, so just - * return true in that case. */ + * return successfully in that case. */ if (dentry->length == 0) return 0; - if (offset + dentry->length >= metadata_resource_len) { + /* If the dentry does not overflow the metadata resource buffer and is + * not too short, read the rest of it (excluding the alternate data + * streams, but including the file name and short name variable-length + * fields) into memory. */ + if (offset + dentry->length >= metadata_resource_len + || offset + dentry->length < offset) + { ERROR("Directory entry at offset %"PRIu64" and with size " "%"PRIu64" ends past the end of the metadata resource " "(size %"PRIu64")", @@ -953,9 +1102,6 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, return WIMLIB_ERR_INVALID_DENTRY; } - /* If it is a recognized length, read the rest of the directory entry. - * Note: The root directory entry has no name, and its length does not - * include the short name length field. */ if (dentry->length < WIM_DENTRY_DISK_SIZE) { ERROR("Directory entry has invalid length of %"PRIu64" bytes", dentry->length); @@ -995,91 +1141,117 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, /* By the way, the reparse_reserved field does not actually exist (at * least when the file is not a reparse point) */ - p = get_u16(p, &dentry->num_ads); p = get_u16(p, &short_name_len); p = get_u16(p, &file_name_len); - calculated_size = WIM_DENTRY_DISK_SIZE + file_name_len + 2 + - short_name_len; + /* We now know the length of the file name and short name. Make sure + * the length of the dentry is large enough to actually hold them. + * + * The calculated length here is unaligned to allow for the possibility + * that the dentry->length names an unaligned length, although this + * would be unexpected. */ + calculated_size = __dentry_correct_length_unaligned(file_name_len, + short_name_len); if (dentry->length < calculated_size) { ERROR("Unexpected end of directory entry! (Expected " - "%"PRIu64" bytes, got %"PRIu64" bytes. " + "at least %"PRIu64" bytes, got %"PRIu64" bytes. " "short_name_len = %hu, file_name_len = %hu)", calculated_size, dentry->length, short_name_len, file_name_len); return WIMLIB_ERR_INVALID_DENTRY; } - /* Read the filename. */ - file_name = MALLOC(file_name_len); - if (!file_name) { - ERROR("Failed to allocate %hu bytes for dentry file name", - file_name_len); - return WIMLIB_ERR_NOMEM; - } - p = get_bytes(p, file_name_len, file_name); + /* Read the filename if present. Note: if the filename is empty, there + * is no null terminator following it. */ + if (file_name_len) { + file_name = MALLOC(file_name_len); + if (!file_name) { + ERROR("Failed to allocate %hu bytes for dentry file name", + file_name_len); + return WIMLIB_ERR_NOMEM; + } + p = get_bytes(p, file_name_len, file_name); - /* Convert filename to UTF-8. */ - file_name_utf8 = utf16_to_utf8(file_name, file_name_len, - &file_name_utf8_len); + /* Convert filename to UTF-8. */ + file_name_utf8 = utf16_to_utf8(file_name, file_name_len, + &file_name_utf8_len); - if (!file_name_utf8) { - ERROR("Failed to allocate memory to convert UTF-16 " - "filename (%hu bytes) to UTF-8", file_name_len); - ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name; + if (!file_name_utf8) { + ERROR("Failed to allocate memory to convert UTF-16 " + "filename (%hu bytes) to UTF-8", file_name_len); + ret = WIMLIB_ERR_NOMEM; + goto out_free_file_name; + } + if (*(u16*)p) + WARNING("Expected two zero bytes following the file name " + "`%s', but found non-zero bytes", file_name_utf8); + p += 2; } - /* Undocumented padding between file name and short name. This probably - * is supposed to be a terminating null character. */ - p += 2; - - /* Read the short filename. */ - short_name = MALLOC(short_name_len); - if (!short_name) { - ERROR("Failed to allocate %hu bytes for short filename", - short_name_len); - ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name_utf8; + /* Align the calculated size */ + calculated_size = (calculated_size + 7) & ~7; + + if (dentry->length > calculated_size) { + /* Weird; the dentry says it's longer than it should be. Note + * that the length field does NOT include the size of the + * alternate stream entries. */ + + /* Strangely, some directory entries inexplicably have a little + * over 70 bytes of extra data. The exact amount of data seems + * to be 72 bytes, but it is aligned on the next 8-byte + * boundary. It does NOT seem to be alternate data stream + * entries. Here's an example of the aligned data: + * + * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f + * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a + * 00000000 00000000 00000000 00000000 + * + * Here's one interpretation of how the data is laid out. + * + * struct unknown { + * u32 field1; (always 0x00000001) + * u32 field2; (always 0x40000000) + * u8 data[48]; (???) + * u64 reserved1; (always 0) + * u64 reserved2; (always 0) + * };*/ + WARNING("Dentry for file or directory `%s' has %zu extra " + "bytes of data", + file_name_utf8, dentry->length - calculated_size); } - p = get_bytes(p, short_name_len, short_name); + /* Read the short filename if present. Note: if there is no short + * filename, there is no null terminator following it. */ + if (short_name_len) { + short_name = MALLOC(short_name_len); + if (!short_name) { + ERROR("Failed to allocate %hu bytes for short filename", + short_name_len); + ret = WIMLIB_ERR_NOMEM; + goto out_free_file_name_utf8; + } - /* Some directory entries inexplicibly have a little over 70 bytes of - * extra data. The exact amount of data seems to be 72 bytes, but it is - * aligned on the next 8-byte boundary. Here's an example of the - * aligned data: - * - * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f - * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a - * 00000000 00000000 00000000 00000000 - * - * Here's one interpretation of how the data is laid out. - * - * struct unknown { - * u32 field1; (always 0x00000001) - * u32 field2; (always 0x40000000) - * u8 data[48]; (???) - * u64 reserved1; (always 0) - * u64 reserved2; (always 0) - * };*/ -#if 0 - if (dentry->length - calculated_size >= WIM_ADS_ENTRY_DISK_SIZE) { - printf("%s: %lu / %lu (", file_name_utf8, - calculated_size, dentry->length); - print_string(p + WIM_ADS_ENTRY_DISK_SIZE, dentry->length - calculated_size - WIM_ADS_ENTRY_DISK_SIZE); - puts(")"); - print_byte_field(p, dentry->length - calculated_size); - putchar('\n'); + p = get_bytes(p, short_name_len, short_name); + if (*(u16*)p) + WARNING("Expected two zero bytes following the file name " + "`%s', but found non-zero bytes", file_name_utf8); + p += 2; } -#endif + /* + * Read the alternate data streams, if present. dentry->num_ads tells + * us how many they are, and they will directly follow the dentry + * on-disk. + * + * Note that each alternate data stream entry begins on an 8-byte + * aligned boundary, and the alternate data stream entries are NOT + * included in the dentry->length field for some reason. + */ if (dentry->num_ads != 0) { - calculated_size = (calculated_size + 7) & ~7; if (calculated_size > metadata_resource_len - offset) { ERROR("Not enough space in metadata resource for " "alternate stream entries"); @@ -1093,6 +1265,8 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, goto out_free_short_name; } + /* We've read all the data for this dentry. Set the names and their + * lengths, and we've done. */ dentry->file_name = file_name; dentry->file_name_utf8 = file_name_utf8; dentry->short_name = short_name; @@ -1109,8 +1283,78 @@ out_free_file_name: return ret; } +/* Run some miscellaneous verifications on a WIM dentry */ +int verify_dentry(struct dentry *dentry, void *wim) +{ + const WIMStruct *w = wim; + const struct lookup_table *table = w->lookup_table; + const struct wim_security_data *sd = wim_const_security_data(w); + int ret = WIMLIB_ERR_INVALID_DENTRY; + + /* Check the security ID */ + if (dentry->security_id < -1) { + ERROR("Dentry `%s' has an invalid security ID (%d)", + dentry->full_path_utf8, dentry->security_id); + goto out; + } + if (dentry->security_id >= sd->num_entries) { + ERROR("Dentry `%s' has an invalid security ID (%d) " + "(there are only %u entries in the security table)", + dentry->full_path_utf8, dentry->security_id, + sd->num_entries); + goto out; + } + + /* Check that lookup table entries for all the resources exist, except + * if the SHA1 message digest is all 0's, which indicates there is + * intentionally no resource there. */ + if (w->hdr.total_parts == 1) { + for (unsigned i = 0; i <= dentry->num_ads; i++) { + struct lookup_table_entry *lte; + const u8 *hash; + hash = dentry_stream_hash_unresolved(dentry, i); + lte = __lookup_resource(table, hash); + if (!lte && !is_zero_hash(hash)) { + ERROR("Could not find lookup table entry for stream " + "%u of dentry `%s'", i, dentry->full_path_utf8); + goto out; + } + } + } + + /* Make sure there is only one un-named stream. */ + unsigned num_unnamed_streams = 0; + unsigned unnamed_stream_idx; + for (unsigned i = 0; i <= dentry->num_ads; i++) { + const u8 *hash; + hash = dentry_stream_hash_unresolved(dentry, i); + if (!dentry_stream_name_len(dentry, i) && !is_zero_hash(hash)) { + num_unnamed_streams++; + unnamed_stream_idx = i; + } + } + if (num_unnamed_streams > 1) { + ERROR("Dentry `%s' has multiple (%u) un-named streams", + dentry->full_path_utf8, num_unnamed_streams); + goto out; + } + +#if 0 + /* Check timestamps */ + if (dentry->last_access_time < dentry->creation_time || + dentry->last_write_time < dentry->creation_time) { + WARNING("Dentry `%s' was created after it was last accessed or " + "written to", dentry->full_path_utf8); + } +#endif + + ret = 0; +out: + return ret; +} + /* - * Writes a dentry to an output buffer. + * Writes a WIM dentry to an output buffer. * * @dentry: The dentry structure. * @p: The memory location to write the data to. @@ -1123,7 +1367,13 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) unsigned padding; const u8 *hash; - p = put_u64(p, dentry->length); + /* We calculate the correct length of the dentry ourselves because the + * dentry->length field may been set to an unexpected value from when we + * read the dentry in (for example, there may have been unknown data + * appended to the end of the dentry...) */ + u64 length = dentry_correct_length(dentry); + + p = put_u64(p, length); p = put_u32(p, dentry->attributes); p = put_u32(p, dentry->security_id); p = put_u64(p, dentry->subdir_offset); @@ -1143,7 +1393,7 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) p = put_zeroes(p, 4); } else { u64 hard_link; - p = put_u32(p, dentry->reparse_tag); + p = put_u32(p, 0); if (dentry->link_group_list.next == &dentry->link_group_list) hard_link = 0; else @@ -1153,18 +1403,25 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) p = put_u16(p, dentry->num_ads); p = put_u16(p, dentry->short_name_len); p = put_u16(p, dentry->file_name_len); - p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name); - p = put_u16(p, 0); /* filename padding, 2 bytes. */ - p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name); - - wimlib_assert(p - orig_p <= dentry->length); - if (p - orig_p < dentry->length) - p = put_zeroes(p, dentry->length - (p - orig_p)); + if (dentry->file_name_len) { + p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name); + p = put_u16(p, 0); /* filename padding, 2 bytes. */ + } + if (dentry->short_name) { + p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name); + p = put_u16(p, 0); /* short name padding, 2 bytes */ + } - p = put_zeroes(p, (8 - dentry->length % 8) % 8); + /* Align to 8-byte boundary */ + wimlib_assert(length >= (p - orig_p) + && length - (p - orig_p) <= 7); + p = put_zeroes(p, length - (p - orig_p)); + /* Write the alternate data streams, if there are any. Please see + * read_ads_entries() for comments about the format of the on-disk + * alternate data stream entries. */ for (u16 i = 0; i < dentry->num_ads; i++) { - p = put_u64(p, ads_entry_length(&dentry->ads_entries[i])); + p = put_u64(p, ads_entry_total_length(&dentry->ads_entries[i])); p = put_u64(p, 0); /* Unused */ if (dentry->resolved && dentry->ads_entries[i].lte) hash = dentry->ads_entries[i].lte->hash; @@ -1172,39 +1429,41 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) hash = dentry->ads_entries[i].hash; p = put_bytes(p, SHA1_HASH_SIZE, hash); p = put_u16(p, dentry->ads_entries[i].stream_name_len); - p = put_bytes(p, dentry->ads_entries[i].stream_name_len, - (u8*)dentry->ads_entries[i].stream_name); - p = put_u16(p, 0); + if (dentry->ads_entries[i].stream_name_len) { + p = put_bytes(p, dentry->ads_entries[i].stream_name_len, + (u8*)dentry->ads_entries[i].stream_name); + p = put_u16(p, 0); + } p = put_zeroes(p, (8 - (p - orig_p) % 8) % 8); } +#ifdef ENABLE_ASSERTIONS + wimlib_assert(p - orig_p == __dentry_total_length(dentry, length)); +#endif return p; } -/* Recursive function that writes a dentry tree rooted at @tree, not including - * @tree itself, which has already been written, except in the case of the root - * dentry, which is written right away, along with an end-of-directory entry. */ -u8 *write_dentry_tree(const struct dentry *tree, u8 *p) +/* Recursive function that writes a dentry tree rooted at @parent, not including + * @parent itself, which has already been written. */ +static u8 *write_dentry_tree_recursive(const struct dentry *parent, u8 *p) { const struct dentry *child; - if (dentry_is_root(tree)) { - p = write_dentry(tree, p); - - /* write end of directory entry */ - p = put_u64(p, 0); - } else { - /* Nothing to do for non-directories */ - if (!dentry_is_directory(tree)) - return p; - } + /* Nothing to do if this dentry has no children. */ + if (parent->subdir_offset == 0) + return p; - /* Write child dentries and end-of-directory entry. */ - child = tree->children; + /* Write child dentries and end-of-directory entry. + * + * Note: we need to write all of this dentry's children before + * recursively writing the directory trees rooted at each of the child + * dentries, since the on-disk dentries for a dentry's children are + * always located at consecutive positions in the metadata resource! */ + child = parent->children; if (child) { do { p = write_dentry(child, p); child = child->next; - } while (child != tree->children); + } while (child != parent->children); } /* write end of directory entry */ @@ -1213,20 +1472,46 @@ u8 *write_dentry_tree(const struct dentry *tree, u8 *p) /* Recurse on children. */ if (child) { do { - p = write_dentry_tree(child, p); + p = write_dentry_tree_recursive(child, p); child = child->next; - } while (child != tree->children); + } while (child != parent->children); } return p; } +/* Writes a directory tree to the metadata resource. + * + * @root: Root of the dentry tree. + * @p: Pointer to a buffer with enough space for the dentry tree. + * + * Returns pointer to the byte after the last byte we wrote. + */ +u8 *write_dentry_tree(const struct dentry *root, u8 *p) +{ + wimlib_assert(dentry_is_root(root)); + + /* If we're the root dentry, we have no parent that already + * wrote us, so we need to write ourselves. */ + p = write_dentry(root, p); + + /* Write end of directory entry after the root dentry just to be safe; + * however the root dentry obviously cannot have any siblings. */ + p = put_u64(p, 0); + + /* Recursively write the rest of the dentry tree. */ + return write_dentry_tree_recursive(root, p); +} + /* Reads the children of a dentry, and all their children, ..., etc. from the * metadata resource and into the dentry tree. * * @metadata_resource: An array that contains the uncompressed metadata * resource for the WIM file. - * @metadata_resource_len: The length of @metadata_resource. - * @dentry: A pointer to a struct dentry that is the root of the directory + * + * @metadata_resource_len: The length of the uncompressed metadata resource, in + * bytes. + * + * @dentry: A pointer to a `struct dentry' that is the root of the directory * tree and has already been read from the metadata resource. It * does not need to be the real root because this procedure is * called recursively. @@ -1243,8 +1528,12 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, struct dentry cur_child; int ret; - /* If @dentry is a regular file, nothing more needs to be done for this - * branch. */ + /* + * If @dentry has no child dentries, nothing more needs to be done for + * this branch. This is the case for regular files, symbolic links, and + * *possibly* empty directories (although an empty directory may also + * have one child dentry that is the special end-of-directory dentry) + */ if (cur_offset == 0) return 0; @@ -1258,10 +1547,8 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, break; /* Check for end of directory. */ - if (cur_child.length == 0) { - ret = 0; + if (cur_child.length == 0) break; - } /* Not end of directory. Allocate this child permanently and * link it to the parent and previous child. */ @@ -1293,12 +1580,16 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, break; } - /* Advance to the offset of the next child. */ + /* Advance to the offset of the next child. Note: We need to + * advance by the TOTAL length of the dentry, not by the length + * child->length, which although it does take into account the + * padding, it DOES NOT take into account alternate stream + * entries. */ cur_offset += dentry_total_length(child); } - /* Link last child to first one, and set parent's - * children pointer to the first child. */ + /* Link last child to first one, and set parent's children pointer to + * the first child. */ if (prev_child) { prev_child->next = first_child; first_child->prev = prev_child;