]> wimlib.net Git - wimlib/commitdiff
More ADS and dentry alignment and padding issues
authorEric Biggers <ebiggers3@gmail.com>
Tue, 28 Aug 2012 03:12:52 +0000 (22:12 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Tue, 28 Aug 2012 03:12:52 +0000 (22:12 -0500)
So... I think that ALL the filename fields (short name, file name, stream name)
actually have a 2-byte null terminator, but NOT if that field has zero length.
This is now taken into account.

A warning is now issued if the dentry length is unexpectedly large.

We will now write all dentries with the "canonical" length regardless of what
dentry length was read in (as we are throwing out whatever junk was appended to
it anyway...).

src/dentry.c
src/dentry.h
src/hardlink.c

index 9dfd5afab7c6b62133ede89fcbdb5b6b64a55b3b..6d104ee5a990d843d13c5d508cb621e95836f1bd 100644 (file)
@@ -54,14 +54,42 @@ static bool dentry_has_name(const struct dentry *dentry, const char *name,
        return memcmp(dentry->file_name_utf8, name, name_len) == 0;
 }
 
+static u64 __dentry_correct_length_unaligned(u16 file_name_len,
+                                            u16 short_name_len)
+{
+       u64 length = WIM_DENTRY_DISK_SIZE;
+       if (file_name_len)
+               length += file_name_len + 2;
+       if (short_name_len)
+               length += short_name_len + 2;
+       return length;
+}
+
+static u64 dentry_correct_length_unaligned(const struct dentry *dentry)
+{
+       return __dentry_correct_length_unaligned(dentry->file_name_len,
+                                                dentry->short_name_len);
+}
+
+/* Return the "correct" value to write in the length field of the dentry, based
+ * on the file name length and short name length */
+static u64 dentry_correct_length(const struct dentry *dentry)
+{
+       return (dentry_correct_length_unaligned(dentry) + 7) & ~7;
+}
+
+static u64 __dentry_total_length(const struct dentry *dentry, u64 length)
+{
+       for (u16 i = 0; i < dentry->num_ads; i++)
+               length += ads_entry_total_length(&dentry->ads_entries[i]);
+       return (length + 7) & ~7;
+}
+
 /* Real length of a dentry, including the alternate data stream entries, which
  * are not included in the dentry->length field... */
 u64 dentry_total_length(const struct dentry *dentry)
 {
-       u64 length = (dentry->length + 7) & ~7;
-       for (u16 i = 0; i < dentry->num_ads; i++)
-               length += ads_entry_total_length(&dentry->ads_entries[i]);
-       return length;
+       return __dentry_total_length(dentry, dentry->length);
 }
 
 /* Transfers file attributes from a `stat' buffer to a struct dentry. */
@@ -313,7 +341,8 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p)
                /* Advance the subdir offset by the amount of space the children
                 * of this dentry take up. */
                do {
-                       *subdir_offset_p += dentry_total_length(child);
+                       *subdir_offset_p += __dentry_total_length(child,
+                                                                 dentry_correct_length(child));
                        child = child->next;
                } while (child != dentry->children);
 
@@ -722,17 +751,6 @@ void unlink_dentry(struct dentry *dentry)
        }
 }
 
-
-/* Recalculates the length of @dentry based on its file name length and short
- * name length.  */
-static inline void recalculate_dentry_size(struct dentry *dentry)
-{
-       dentry->length = WIM_DENTRY_DISK_SIZE + dentry->file_name_len + 
-                        2 + dentry->short_name_len;
-       /* Must be multiple of 8. */
-       dentry->length = (dentry->length + 7) & ~7;
-}
-
 /* Duplicates a UTF-8 name into UTF-8 and UTF-16 strings and returns the strings
  * and their lengths in the pointer arguments */
 int get_names(char **name_utf16_ret, char **name_utf8_ret,
@@ -776,7 +794,7 @@ int change_dentry_name(struct dentry *dentry, const char *new_name)
                        &dentry->file_name_len, &dentry->file_name_utf8_len,
                         new_name);
        if (ret == 0)
-               recalculate_dentry_size(dentry);
+               dentry->length = dentry_correct_length(dentry);
        return ret;
 }
 
@@ -859,15 +877,21 @@ void calculate_dir_tree_statistics(struct dentry *root, struct lookup_table *tab
  *
  * struct ads_entry_on_disk {
  *     u64  length;          // Length of the entry, in bytes.  This includes
- *                                 all fields (including the stream name, the
- *                                 null terminator, AND the padding!).
+ *                                 all fields (including the stream name and 
+ *                                 null terminator if present, AND the padding!).
  *     u64  reserved;        // Seems to be unused
  *     u8   hash[20];        // SHA1 message digest of the uncompressed stream
  *     u16  stream_name_len; // Length of the stream name, in bytes
  *     char stream_name[];   // Stream name in UTF-16LE, @stream_name_len bytes long,
  *                                  not including null terminator
  *     u16  zero;            // UTF-16 null terminator for the stream name, NOT
- *                                 included in @stream_name_len
+ *                                 included in @stream_name_len.  Based on what
+ *                                 I've observed from filenames in dentries,
+ *                                 this field should not exist when
+ *                                 (@stream_name_len == 0), but you can't
+ *                                 actually tell because of the padding anyway
+ *                                 (provided that the padding is zeroed, which
+ *                                 it always seems to be).
  *     char padding[];       // Padding to make the size a multiple of 8 bytes.
  * };
  *
@@ -909,7 +933,7 @@ static int read_ads_entries(const u8 *p, struct dentry *dentry,
                }
 
                p = get_u64(p, &length);
-               p += 8;
+               p += 8; /* Skip the reserved field */
                p = get_bytes(p, SHA1_HASH_SIZE, (u8*)cur_entry->hash);
                p = get_u16(p, &cur_entry->stream_name_len);
 
@@ -929,7 +953,7 @@ static int read_ads_entries(const u8 *p, struct dentry *dentry,
                if (remaining_size < length_no_padding) {
                        ERROR("Stream entries go past end of metadata resource");
                        ERROR("(remaining_size = %"PRIu64" bytes, "
-                             "length_no_padding = %"PRIu16" bytes",
+                             "length_no_padding = %"PRIu16" bytes)",
                              remaining_size, length_no_padding);
                        ret = WIMLIB_ERR_INVALID_DENTRY;
                        goto out_free_ads_entries;
@@ -951,21 +975,23 @@ static int read_ads_entries(const u8 *p, struct dentry *dentry,
                        goto out_free_ads_entries;
                }
 
-               cur_entry->stream_name = MALLOC(cur_entry->stream_name_len);
-               if (!cur_entry->stream_name) {
-                       ret = WIMLIB_ERR_NOMEM;
-                       goto out_free_ads_entries;
-               }
-               get_bytes(p, cur_entry->stream_name_len,
-                         (u8*)cur_entry->stream_name);
-               cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name,
-                                                           cur_entry->stream_name_len,
-                                                           &utf8_len);
-               cur_entry->stream_name_utf8_len = utf8_len;
-
-               if (!cur_entry->stream_name_utf8) {
-                       ret = WIMLIB_ERR_NOMEM;
-                       goto out_free_ads_entries;
+               if (cur_entry->stream_name_len) {
+                       cur_entry->stream_name = MALLOC(cur_entry->stream_name_len);
+                       if (!cur_entry->stream_name) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_free_ads_entries;
+                       }
+                       get_bytes(p, cur_entry->stream_name_len,
+                                 (u8*)cur_entry->stream_name);
+                       cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name,
+                                                                   cur_entry->stream_name_len,
+                                                                   &utf8_len);
+                       cur_entry->stream_name_utf8_len = utf8_len;
+
+                       if (!cur_entry->stream_name_utf8) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_free_ads_entries;
+                       }
                }
                /* It's expected that the size of every ADS entry is a multiple
                 * of 8.  However, to be safe, I'm allowing the possibility of
@@ -1012,9 +1038,9 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len,
 {
        const u8 *p;
        u64 calculated_size;
-       char *file_name;
-       char *file_name_utf8;
-       char *short_name;
+       char *file_name = NULL;
+       char *file_name_utf8 = NULL;
+       char *short_name = NULL;
        u16 short_name_len;
        u16 file_name_len;
        size_t file_name_utf8_len;
@@ -1100,92 +1126,100 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len,
        p = get_u16(p, &short_name_len);
        p = get_u16(p, &file_name_len);
 
-       /* We now know the length of the file name and short name.  These should
-        * be included in the dentry length, but make sure the numbers are
-        * consistent. */
-       calculated_size = WIM_DENTRY_DISK_SIZE + file_name_len + 2 +
-                         short_name_len;
+       /* We now know the length of the file name and short name.  Make sure
+        * the length of the dentry is large enough to actually hold them. 
+        *
+        * The calculated length here is unaligned to allow for the possibility
+        * that the dentry->length names an unaligned length, although this
+        * would be unexpected. */
+       calculated_size = __dentry_correct_length_unaligned(file_name_len,
+                                                           short_name_len);
 
        if (dentry->length < calculated_size) {
                ERROR("Unexpected end of directory entry! (Expected "
-                     "%"PRIu64" bytes, got %"PRIu64" bytes. "
+                     "at least %"PRIu64" bytes, got %"PRIu64" bytes. "
                      "short_name_len = %hu, file_name_len = %hu)", 
                      calculated_size, dentry->length,
                      short_name_len, file_name_len);
                return WIMLIB_ERR_INVALID_DENTRY;
        }
 
-       /* Read the filename. */
-       file_name = MALLOC(file_name_len);
-       if (!file_name) {
-               ERROR("Failed to allocate %hu bytes for dentry file name",
-                     file_name_len);
-               return WIMLIB_ERR_NOMEM;
-       }
-       p = get_bytes(p, file_name_len, file_name);
+       /* Read the filename if present.  Note: if the filename is empty, there
+        * is no null terminator following it. */
+       if (file_name_len) {
+               file_name = MALLOC(file_name_len);
+               if (!file_name) {
+                       ERROR("Failed to allocate %hu bytes for dentry file name",
+                             file_name_len);
+                       return WIMLIB_ERR_NOMEM;
+               }
+               p = get_bytes(p, file_name_len, file_name);
 
-       /* Convert filename to UTF-8. */
-       file_name_utf8 = utf16_to_utf8(file_name, file_name_len, 
-                                      &file_name_utf8_len);
+               /* Convert filename to UTF-8. */
+               file_name_utf8 = utf16_to_utf8(file_name, file_name_len, 
+                                              &file_name_utf8_len);
 
-       if (!file_name_utf8) {
-               ERROR("Failed to allocate memory to convert UTF-16 "
-                     "filename (%hu bytes) to UTF-8", file_name_len);
-               ret = WIMLIB_ERR_NOMEM;
-               goto out_free_file_name;
+               if (!file_name_utf8) {
+                       ERROR("Failed to allocate memory to convert UTF-16 "
+                             "filename (%hu bytes) to UTF-8", file_name_len);
+                       ret = WIMLIB_ERR_NOMEM;
+                       goto out_free_file_name;
+               }
+               if (*(u16*)p)
+                       WARNING("Expected two zero bytes following the file name "
+                               "`%s', but found non-zero bytes", file_name_utf8);
+               p += 2;
        }
 
-       /* Before the short name begins, there is a null terminator of two zero
-        * bytes that follow the long filename, even if the long file name is of
-        * zero length. */
-
-       /* XXX There seems to be no null terminator following the short name;
-        * verify this. */
-       if (*(u16*)p)
-               WARNING("Expected two zero bytes following the file name "
-                       "`%s', but found non-zero bytes", file_name_utf8);
-       p += 2;
-
-       /* Read the short filename. */
-       short_name = MALLOC(short_name_len);
-       if (!short_name) {
-               ERROR("Failed to allocate %hu bytes for short filename",
-                     short_name_len);
-               ret = WIMLIB_ERR_NOMEM;
-               goto out_free_file_name_utf8;
+       /* Align the calculated size */
+       calculated_size = (calculated_size + 7) & ~7;
+
+       if (dentry->length > calculated_size) {
+               /* Weird; the dentry says it's longer than it should be.  Note
+                * that the length field does NOT include the size of the
+                * alternate stream entries. */
+
+               /* Strangely, some directory entries inexplicably have a little
+                * over 70 bytes of extra data.  The exact amount of data seems
+                * to be 72 bytes, but it is aligned on the next 8-byte
+                * boundary.  It does NOT seem to be alternate data stream
+                * entries.  Here's an example of the aligned data:
+                *
+                * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f
+                * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a
+                * 00000000 00000000 00000000 00000000
+                *
+                * Here's one interpretation of how the data is laid out.
+                *
+                * struct unknown {
+                *      u32 field1; (always 0x00000001)
+                *      u32 field2; (always 0x40000000)
+                *      u8  data[48]; (???)
+                *      u64 reserved1; (always 0)
+                *      u64 reserved2; (always 0)
+                * };*/
+               WARNING("Dentry for file or directory `%s' has %zu extra "
+                       "bytes of data",
+                       file_name_utf8, dentry->length - calculated_size);
        }
 
-       p = get_bytes(p, short_name_len, short_name);
+       /* Read the short filename if present.  Note: if there is no short
+        * filename, there is no null terminator following it. */
+       if (short_name_len) {
+               short_name = MALLOC(short_name_len);
+               if (!short_name) {
+                       ERROR("Failed to allocate %hu bytes for short filename",
+                             short_name_len);
+                       ret = WIMLIB_ERR_NOMEM;
+                       goto out_free_file_name_utf8;
+               }
 
-       /* Some directory entries inexplicably have a little over 70 bytes of
-        * extra data.  The exact amount of data seems to be 72 bytes, but it is
-        * aligned on the next 8-byte boundary.  It does NOT seem to be
-        * alternate data stream entries.  Here's an example of the aligned
-        * data:
-        *
-        * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f
-        * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a
-        * 00000000 00000000 00000000 00000000
-        *
-        * Here's one interpretation of how the data is laid out.
-        *
-        * struct unknown {
-        *      u32 field1; (always 0x00000001)
-        *      u32 field2; (always 0x40000000)
-        *      u8  data[48]; (???)
-        *      u64 reserved1; (always 0)
-        *      u64 reserved2; (always 0)
-        * };*/
-#if 0
-       if (dentry->length - calculated_size >= WIM_ADS_ENTRY_DISK_SIZE) {
-               printf("%s: %lu / %lu (", file_name_utf8, 
-                               calculated_size, dentry->length);
-               print_string(p + WIM_ADS_ENTRY_DISK_SIZE, dentry->length - calculated_size - WIM_ADS_ENTRY_DISK_SIZE);
-               puts(")");
-               print_byte_field(p, dentry->length - calculated_size);
-               putchar('\n');
+               p = get_bytes(p, short_name_len, short_name);
+               if (*(u16*)p)
+                       WARNING("Expected two zero bytes following the file name "
+                               "`%s', but found non-zero bytes", file_name_utf8);
+               p += 2;
        }
-#endif
 
        /* 
         * Read the alternate data streams, if present.  dentry->num_ads tells
@@ -1197,7 +1231,6 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len,
         * included in the dentry->length field for some reason.
         */
        if (dentry->num_ads != 0) {
-               calculated_size = (calculated_size + 7) & ~7;
                if (calculated_size > metadata_resource_len - offset) {
                        ERROR("Not enough space in metadata resource for "
                              "alternate stream entries");
@@ -1243,7 +1276,13 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p)
        unsigned padding;
        const u8 *hash;
 
-       p = put_u64(p, dentry->length);
+       /* We calculate the correct length of the dentry ourselves because the
+        * dentry->length field may been set to an unexpected value from when we
+        * read the dentry in (for example, there may have been unknown data
+        * appended to the end of the dentry...) */
+       u64 length = dentry_correct_length(dentry);
+
+       p = put_u64(p, length);
        p = put_u32(p, dentry->attributes);
        p = put_u32(p, dentry->security_id);
        p = put_u64(p, dentry->subdir_offset);
@@ -1273,16 +1312,19 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p)
        p = put_u16(p, dentry->num_ads);
        p = put_u16(p, dentry->short_name_len);
        p = put_u16(p, dentry->file_name_len);
-       p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name);
-       p = put_u16(p, 0); /* filename padding, 2 bytes. */
-       p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name);
-
-       wimlib_assert(p - orig_p <= dentry->length);
-       if (p - orig_p < dentry->length)
-               p = put_zeroes(p, dentry->length - (p - orig_p));
+       if (dentry->file_name_len) {
+               p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name);
+               p = put_u16(p, 0); /* filename padding, 2 bytes. */
+       }
+       if (dentry->short_name) {
+               p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name);
+               p = put_u16(p, 0); /* short name padding, 2 bytes */
+       }
 
        /* Align to 8-byte boundary */
-       p = put_zeroes(p, (8 - dentry->length % 8) % 8);
+       wimlib_assert(length >= (p - orig_p)
+                       && length - (p - orig_p) <= 7);
+       p = put_zeroes(p, length - (p - orig_p));
 
        /* Write the alternate data streams, if there are any.  Please see
         * read_ads_entries() for comments about the format of the on-disk
@@ -1296,11 +1338,16 @@ static u8 *write_dentry(const struct dentry *dentry, u8 *p)
                        hash = dentry->ads_entries[i].hash;
                p = put_bytes(p, SHA1_HASH_SIZE, hash);
                p = put_u16(p, dentry->ads_entries[i].stream_name_len);
-               p = put_bytes(p, dentry->ads_entries[i].stream_name_len,
-                                (u8*)dentry->ads_entries[i].stream_name);
-               p = put_u16(p, 0);
+               if (dentry->ads_entries[i].stream_name_len) {
+                       p = put_bytes(p, dentry->ads_entries[i].stream_name_len,
+                                        (u8*)dentry->ads_entries[i].stream_name);
+                       p = put_u16(p, 0);
+               }
                p = put_zeroes(p, (8 - (p - orig_p) % 8) % 8);
        }
+#ifdef ENABLE_ASSERTIONS
+       wimlib_assert(p - orig_p == __dentry_total_length(dentry, length));
+#endif
        return p;
 }
 
@@ -1442,7 +1489,11 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
                                break;
                }
 
-               /* Advance to the offset of the next child. */
+               /* Advance to the offset of the next child.  Note: We need to
+                * advance by the TOTAL length of the dentry, not by the length
+                * child->length, which although it does take into account the
+                * padding, it DOES NOT take into account alternate stream
+                * entries. */
                cur_offset += dentry_total_length(child);
        }
 
index be0b058605c0a6e869cd9dfdfa0410454ce92fb8..74776420351f0dba418d387a060fee720decb4b6 100644 (file)
@@ -89,7 +89,9 @@ struct ads_entry {
  * entry to align the next one (or the next dentry) on an 8-byte boundary. */
 static inline u64 ads_entry_total_length(const struct ads_entry *entry)
 {
-       u64 len = WIM_ADS_ENTRY_DISK_SIZE + entry->stream_name_len + 2;
+       u64 len = WIM_ADS_ENTRY_DISK_SIZE;
+       if (entry->stream_name_len)
+               len += entry->stream_name_len + 2;
        return (len + 7) & ~7;
 }
 
@@ -155,7 +157,11 @@ struct dentry {
         * The length here includes the base directory entry on disk as well as
         * the long and short filenames.  It does NOT include any alternate
         * stream entries that may follow the directory entry, even though the
-        * size of those needs to be considered.
+        * size of those needs to be considered.  The length SHOULD be 8-byte
+        * aligned, although we don't require it to be.  We do require the
+        * length to be large enough to hold the file name(s) of the dentry;
+        * additionally, a warning is issued if this field is larger than the
+        * aligned size.
         */
        u64 length;
 
index f5d3e1c59d91f31115843d2b649dcc2b81c10bc8..0ceafb8240f280f937833a3738006d358eeb6dc1 100644 (file)
@@ -296,8 +296,8 @@ static int share_dentry_ads(struct dentry *owner, struct dentry *user)
        user->ads_entries_status = ADS_ENTRIES_USER;
        return 0;
 mismatch:
-       WARNING("Dentries `%s' and `%s' in the same hard-link group but "
-               "do not share the same %s",
+       WARNING("Dentries `%s' and `%s' are supposedly in the same hard-link "
+               "group but do not share the same %s",
                owner->full_path_utf8, user->full_path_utf8,
                mismatch_type);
        return WIMLIB_ERR_INVALID_DENTRY;