X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fdentry.c;h=6d104ee5a990d843d13c5d508cb621e95836f1bd;hb=599038a018d59ae50adca3ead59bfbb83aadef59;hp=a24e620ad98e90df88f03914e94ee5448db8c7f8;hpb=d9d4b6b3e0462d3419bb8a9964b5da38a72da02f;p=wimlib diff --git a/src/dentry.c b/src/dentry.c index a24e620a..6d104ee5 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -54,14 +54,42 @@ static bool dentry_has_name(const struct dentry *dentry, const char *name, return memcmp(dentry->file_name_utf8, name, name_len) == 0; } +static u64 __dentry_correct_length_unaligned(u16 file_name_len, + u16 short_name_len) +{ + u64 length = WIM_DENTRY_DISK_SIZE; + if (file_name_len) + length += file_name_len + 2; + if (short_name_len) + length += short_name_len + 2; + return length; +} + +static u64 dentry_correct_length_unaligned(const struct dentry *dentry) +{ + return __dentry_correct_length_unaligned(dentry->file_name_len, + dentry->short_name_len); +} + +/* Return the "correct" value to write in the length field of the dentry, based + * on the file name length and short name length */ +static u64 dentry_correct_length(const struct dentry *dentry) +{ + return (dentry_correct_length_unaligned(dentry) + 7) & ~7; +} + +static u64 __dentry_total_length(const struct dentry *dentry, u64 length) +{ + for (u16 i = 0; i < dentry->num_ads; i++) + length += ads_entry_total_length(&dentry->ads_entries[i]); + return (length + 7) & ~7; +} + /* Real length of a dentry, including the alternate data stream entries, which * are not included in the dentry->length field... */ u64 dentry_total_length(const struct dentry *dentry) { - u64 length = (dentry->length + 7) & ~7; - for (u16 i = 0; i < dentry->num_ads; i++) - length += ads_entry_total_length(&dentry->ads_entries[i]); - return length; + return __dentry_total_length(dentry, dentry->length); } /* Transfers file attributes from a `stat' buffer to a struct dentry. */ @@ -313,7 +341,8 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) /* Advance the subdir offset by the amount of space the children * of this dentry take up. */ do { - *subdir_offset_p += dentry_total_length(child); + *subdir_offset_p += __dentry_total_length(child, + dentry_correct_length(child)); child = child->next; } while (child != dentry->children); @@ -722,17 +751,6 @@ void unlink_dentry(struct dentry *dentry) } } - -/* Recalculates the length of @dentry based on its file name length and short - * name length. */ -static inline void recalculate_dentry_size(struct dentry *dentry) -{ - dentry->length = WIM_DENTRY_DISK_SIZE + dentry->file_name_len + - 2 + dentry->short_name_len; - /* Must be multiple of 8. */ - dentry->length = (dentry->length + 7) & ~7; -} - /* Duplicates a UTF-8 name into UTF-8 and UTF-16 strings and returns the strings * and their lengths in the pointer arguments */ int get_names(char **name_utf16_ret, char **name_utf8_ret, @@ -776,7 +794,7 @@ int change_dentry_name(struct dentry *dentry, const char *new_name) &dentry->file_name_len, &dentry->file_name_utf8_len, new_name); if (ret == 0) - recalculate_dentry_size(dentry); + dentry->length = dentry_correct_length(dentry); return ret; } @@ -850,7 +868,7 @@ void calculate_dir_tree_statistics(struct dentry *root, struct lookup_table *tab * * @dentry: Dentry to load the alternate data streams into. * @dentry->num_ads must have been set to the number of - * data streams that are expected. + * alternate data streams that are expected. * * @remaining_size: Number of bytes of data remaining in the buffer pointed * to by @p. @@ -858,14 +876,23 @@ void calculate_dir_tree_statistics(struct dentry *root, struct lookup_table *tab * The format of the on-disk alternate stream entries is as follows: * * struct ads_entry_on_disk { - * u64 length; // Length of the entry, in bytes + * u64 length; // Length of the entry, in bytes. This includes + * all fields (including the stream name and + * null terminator if present, AND the padding!). * u64 reserved; // Seems to be unused * u8 hash[20]; // SHA1 message digest of the uncompressed stream * u16 stream_name_len; // Length of the stream name, in bytes * char stream_name[]; // Stream name in UTF-16LE, @stream_name_len bytes long, - * // not including null terminator + * not including null terminator * u16 zero; // UTF-16 null terminator for the stream name, NOT - * // included in @stream_name_len + * included in @stream_name_len. Based on what + * I've observed from filenames in dentries, + * this field should not exist when + * (@stream_name_len == 0), but you can't + * actually tell because of the padding anyway + * (provided that the padding is zeroed, which + * it always seems to be). + * char padding[]; // Padding to make the size a multiple of 8 bytes. * }; * * In addition, the entries are 8-byte aligned. @@ -892,6 +919,8 @@ static int read_ads_entries(const u8 *p, struct dentry *dentry, for (u16 i = 0; i < num_ads; i++) { struct ads_entry *cur_entry = &ads_entries[i]; u64 length; + u64 length_no_padding; + u64 total_length; size_t utf8_len; const char *p_save = p; @@ -902,43 +931,81 @@ static int read_ads_entries(const u8 *p, struct dentry *dentry, ret = WIMLIB_ERR_INVALID_DENTRY; goto out_free_ads_entries; } - remaining_size -= WIM_ADS_ENTRY_DISK_SIZE; - p = get_u64(p, &length); /* ADS entry length */ - p += 8; /* Unused */ + p = get_u64(p, &length); + p += 8; /* Skip the reserved field */ p = get_bytes(p, SHA1_HASH_SIZE, (u8*)cur_entry->hash); p = get_u16(p, &cur_entry->stream_name_len); cur_entry->stream_name = NULL; cur_entry->stream_name_utf8 = NULL; - if (remaining_size < cur_entry->stream_name_len + 2) { + /* Length including neither the null terminator nor the padding + * */ + length_no_padding = WIM_ADS_ENTRY_DISK_SIZE + + cur_entry->stream_name_len; + + /* Length including the null terminator and the padding */ + total_length = ((length_no_padding + 2) + 7) & ~7; + + wimlib_assert(total_length == ads_entry_total_length(cur_entry)); + + if (remaining_size < length_no_padding) { ERROR("Stream entries go past end of metadata resource"); - ERROR("(remaining_size = %"PRIu64" bytes, stream_name_len " - "= %"PRIu16" bytes", remaining_size, - cur_entry->stream_name_len); + ERROR("(remaining_size = %"PRIu64" bytes, " + "length_no_padding = %"PRIu16" bytes)", + remaining_size, length_no_padding); ret = WIMLIB_ERR_INVALID_DENTRY; goto out_free_ads_entries; } - remaining_size -= cur_entry->stream_name_len + 2; - cur_entry->stream_name = MALLOC(cur_entry->stream_name_len); - if (!cur_entry->stream_name) { - ret = WIMLIB_ERR_NOMEM; + /* The @length field in the on-disk ADS entry is expected to be + * equal to @total_length, which includes all of the entry and + * the padding that follows it to align the next ADS entry to an + * 8-byte boundary. However, to be safe, we'll accept the + * length field as long as it's not less than the un-padded + * total length and not more than the padded total length. */ + if (length < length_no_padding || length > total_length) { + ERROR("Stream entry has unexpected length " + "field (length field = %"PRIu64", " + "unpadded total length = %"PRIu64", " + "padded total length = %"PRIu64")", + length, length_no_padding, total_length); + ret = WIMLIB_ERR_INVALID_DENTRY; goto out_free_ads_entries; } - get_bytes(p, cur_entry->stream_name_len, - (u8*)cur_entry->stream_name); - cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name, - cur_entry->stream_name_len, - &utf8_len); - cur_entry->stream_name_utf8_len = utf8_len; - - if (!cur_entry->stream_name_utf8) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_ads_entries; + + if (cur_entry->stream_name_len) { + cur_entry->stream_name = MALLOC(cur_entry->stream_name_len); + if (!cur_entry->stream_name) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_ads_entries; + } + get_bytes(p, cur_entry->stream_name_len, + (u8*)cur_entry->stream_name); + cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name, + cur_entry->stream_name_len, + &utf8_len); + cur_entry->stream_name_utf8_len = utf8_len; + + if (!cur_entry->stream_name_utf8) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_ads_entries; + } } - p = p_save + ads_entry_total_length(cur_entry); + /* It's expected that the size of every ADS entry is a multiple + * of 8. However, to be safe, I'm allowing the possibility of + * an ADS entry at the very end of the metadata resource ending + * un-aligned. So although we still need to increment the input + * pointer by @total_length to reach the next ADS entry, it's + * possible that less than @total_length is actually remaining + * in the metadata resource. We should set the remaining size to + * 0 bytes if this happens. */ + p = p_save + total_length; + if (remaining_size < total_length) + remaining_size = 0; + else + remaining_size -= total_length; } dentry->ads_entries = ads_entries; return 0; @@ -971,9 +1038,9 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, { const u8 *p; u64 calculated_size; - char *file_name; - char *file_name_utf8; - char *short_name; + char *file_name = NULL; + char *file_name_utf8 = NULL; + char *short_name = NULL; u16 short_name_len; u16 file_name_len; size_t file_name_utf8_len; @@ -1059,85 +1126,100 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, p = get_u16(p, &short_name_len); p = get_u16(p, &file_name_len); - /* We now know the length of the file name and short name. These should - * be included in the dentry length, but make sure the numbers are - * consistent. */ - calculated_size = WIM_DENTRY_DISK_SIZE + file_name_len + 2 + - short_name_len; + /* We now know the length of the file name and short name. Make sure + * the length of the dentry is large enough to actually hold them. + * + * The calculated length here is unaligned to allow for the possibility + * that the dentry->length names an unaligned length, although this + * would be unexpected. */ + calculated_size = __dentry_correct_length_unaligned(file_name_len, + short_name_len); if (dentry->length < calculated_size) { ERROR("Unexpected end of directory entry! (Expected " - "%"PRIu64" bytes, got %"PRIu64" bytes. " + "at least %"PRIu64" bytes, got %"PRIu64" bytes. " "short_name_len = %hu, file_name_len = %hu)", calculated_size, dentry->length, short_name_len, file_name_len); return WIMLIB_ERR_INVALID_DENTRY; } - /* Read the filename. */ - file_name = MALLOC(file_name_len); - if (!file_name) { - ERROR("Failed to allocate %hu bytes for dentry file name", - file_name_len); - return WIMLIB_ERR_NOMEM; - } - p = get_bytes(p, file_name_len, file_name); + /* Read the filename if present. Note: if the filename is empty, there + * is no null terminator following it. */ + if (file_name_len) { + file_name = MALLOC(file_name_len); + if (!file_name) { + ERROR("Failed to allocate %hu bytes for dentry file name", + file_name_len); + return WIMLIB_ERR_NOMEM; + } + p = get_bytes(p, file_name_len, file_name); - /* Convert filename to UTF-8. */ - file_name_utf8 = utf16_to_utf8(file_name, file_name_len, - &file_name_utf8_len); + /* Convert filename to UTF-8. */ + file_name_utf8 = utf16_to_utf8(file_name, file_name_len, + &file_name_utf8_len); - if (!file_name_utf8) { - ERROR("Failed to allocate memory to convert UTF-16 " - "filename (%hu bytes) to UTF-8", file_name_len); - ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name; + if (!file_name_utf8) { + ERROR("Failed to allocate memory to convert UTF-16 " + "filename (%hu bytes) to UTF-8", file_name_len); + ret = WIMLIB_ERR_NOMEM; + goto out_free_file_name; + } + if (*(u16*)p) + WARNING("Expected two zero bytes following the file name " + "`%s', but found non-zero bytes", file_name_utf8); + p += 2; } - /* Undocumented padding between file name and short name. This probably - * is supposed to be a terminating null character. */ - p += 2; - - /* Read the short filename. */ - short_name = MALLOC(short_name_len); - if (!short_name) { - ERROR("Failed to allocate %hu bytes for short filename", - short_name_len); - ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name_utf8; + /* Align the calculated size */ + calculated_size = (calculated_size + 7) & ~7; + + if (dentry->length > calculated_size) { + /* Weird; the dentry says it's longer than it should be. Note + * that the length field does NOT include the size of the + * alternate stream entries. */ + + /* Strangely, some directory entries inexplicably have a little + * over 70 bytes of extra data. The exact amount of data seems + * to be 72 bytes, but it is aligned on the next 8-byte + * boundary. It does NOT seem to be alternate data stream + * entries. Here's an example of the aligned data: + * + * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f + * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a + * 00000000 00000000 00000000 00000000 + * + * Here's one interpretation of how the data is laid out. + * + * struct unknown { + * u32 field1; (always 0x00000001) + * u32 field2; (always 0x40000000) + * u8 data[48]; (???) + * u64 reserved1; (always 0) + * u64 reserved2; (always 0) + * };*/ + WARNING("Dentry for file or directory `%s' has %zu extra " + "bytes of data", + file_name_utf8, dentry->length - calculated_size); } - p = get_bytes(p, short_name_len, short_name); + /* Read the short filename if present. Note: if there is no short + * filename, there is no null terminator following it. */ + if (short_name_len) { + short_name = MALLOC(short_name_len); + if (!short_name) { + ERROR("Failed to allocate %hu bytes for short filename", + short_name_len); + ret = WIMLIB_ERR_NOMEM; + goto out_free_file_name_utf8; + } - /* Some directory entries inexplicably have a little over 70 bytes of - * extra data. The exact amount of data seems to be 72 bytes, but it is - * aligned on the next 8-byte boundary. It does NOT seem to be - * alternate data stream entries. Here's an example of the aligned - * data: - * - * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f - * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a - * 00000000 00000000 00000000 00000000 - * - * Here's one interpretation of how the data is laid out. - * - * struct unknown { - * u32 field1; (always 0x00000001) - * u32 field2; (always 0x40000000) - * u8 data[48]; (???) - * u64 reserved1; (always 0) - * u64 reserved2; (always 0) - * };*/ -#if 0 - if (dentry->length - calculated_size >= WIM_ADS_ENTRY_DISK_SIZE) { - printf("%s: %lu / %lu (", file_name_utf8, - calculated_size, dentry->length); - print_string(p + WIM_ADS_ENTRY_DISK_SIZE, dentry->length - calculated_size - WIM_ADS_ENTRY_DISK_SIZE); - puts(")"); - print_byte_field(p, dentry->length - calculated_size); - putchar('\n'); + p = get_bytes(p, short_name_len, short_name); + if (*(u16*)p) + WARNING("Expected two zero bytes following the file name " + "`%s', but found non-zero bytes", file_name_utf8); + p += 2; } -#endif /* * Read the alternate data streams, if present. dentry->num_ads tells @@ -1149,7 +1231,6 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, * included in the dentry->length field for some reason. */ if (dentry->num_ads != 0) { - calculated_size = (calculated_size + 7) & ~7; if (calculated_size > metadata_resource_len - offset) { ERROR("Not enough space in metadata resource for " "alternate stream entries"); @@ -1195,7 +1276,13 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) unsigned padding; const u8 *hash; - p = put_u64(p, dentry->length); + /* We calculate the correct length of the dentry ourselves because the + * dentry->length field may been set to an unexpected value from when we + * read the dentry in (for example, there may have been unknown data + * appended to the end of the dentry...) */ + u64 length = dentry_correct_length(dentry); + + p = put_u64(p, length); p = put_u32(p, dentry->attributes); p = put_u32(p, dentry->security_id); p = put_u64(p, dentry->subdir_offset); @@ -1225,16 +1312,19 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) p = put_u16(p, dentry->num_ads); p = put_u16(p, dentry->short_name_len); p = put_u16(p, dentry->file_name_len); - p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name); - p = put_u16(p, 0); /* filename padding, 2 bytes. */ - p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name); - - wimlib_assert(p - orig_p <= dentry->length); - if (p - orig_p < dentry->length) - p = put_zeroes(p, dentry->length - (p - orig_p)); + if (dentry->file_name_len) { + p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name); + p = put_u16(p, 0); /* filename padding, 2 bytes. */ + } + if (dentry->short_name) { + p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name); + p = put_u16(p, 0); /* short name padding, 2 bytes */ + } /* Align to 8-byte boundary */ - p = put_zeroes(p, (8 - dentry->length % 8) % 8); + wimlib_assert(length >= (p - orig_p) + && length - (p - orig_p) <= 7); + p = put_zeroes(p, length - (p - orig_p)); /* Write the alternate data streams, if there are any. Please see * read_ads_entries() for comments about the format of the on-disk @@ -1248,11 +1338,16 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p) hash = dentry->ads_entries[i].hash; p = put_bytes(p, SHA1_HASH_SIZE, hash); p = put_u16(p, dentry->ads_entries[i].stream_name_len); - p = put_bytes(p, dentry->ads_entries[i].stream_name_len, - (u8*)dentry->ads_entries[i].stream_name); - p = put_u16(p, 0); + if (dentry->ads_entries[i].stream_name_len) { + p = put_bytes(p, dentry->ads_entries[i].stream_name_len, + (u8*)dentry->ads_entries[i].stream_name); + p = put_u16(p, 0); + } p = put_zeroes(p, (8 - (p - orig_p) % 8) % 8); } +#ifdef ENABLE_ASSERTIONS + wimlib_assert(p - orig_p == __dentry_total_length(dentry, length)); +#endif return p; } @@ -1394,7 +1489,11 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, break; } - /* Advance to the offset of the next child. */ + /* Advance to the offset of the next child. Note: We need to + * advance by the TOTAL length of the dentry, not by the length + * child->length, which although it does take into account the + * padding, it DOES NOT take into account alternate stream + * entries. */ cur_offset += dentry_total_length(child); }