xml.c: Simplify image statistics calculation
authorEric Biggers <ebiggers3@gmail.com>
Fri, 1 May 2015 03:15:05 +0000 (22:15 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 1 May 2015 05:03:28 +0000 (00:03 -0500)
NEWS
src/xml.c

diff --git a/NEWS b/NEWS
index 7b4a0e37ee11b7bf7e78f83197a998f4354e2c89..644d10f120bbef6b559822d6e2700be58ba9f499 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -21,6 +21,10 @@ Version 1.8.1-BETA:
        The '--enable-verify-compression' configure option has been removed.  If
        you want to verify a WIM file, use the 'wimverify' program.
 
+       The way the "file count", "directory count", "total bytes", and "hard
+       link bytes" image statistics (stored in the WIM XML data) is calculated
+       has been slightly changed.
+
 Version 1.8.0:
        Improved the LZX compressor.  It is now 15-20% faster than before and
        provides a slightly better compression ratio.
index 279c3382366a7676471c73ed1ded936ed6b8606a..efd339c1d3771bdf3b2de5c83b3be454502c9560 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1163,79 +1163,25 @@ xml_set_memory_allocator(void *(*malloc_func)(size_t),
 }
 
 static int
-calculate_dentry_statistics(struct wim_dentry *dentry, void *arg)
+calculate_dentry_statistics(struct wim_dentry *dentry, void *_info)
 {
-       struct image_info *info = arg;
+       struct image_info *info = _info;
        const struct wim_inode *inode = dentry->d_inode;
 
-       /* Update directory count and file count.
-        *
-        * Each dentry counts as either a file or a directory, but not both.
-        * The root directory is an exception: it is not counted at all.
-        *
-        * Symbolic links and junction points (and presumably other reparse
-        * points) count as regular files.  This is despite the fact that
-        * junction points have FILE_ATTRIBUTE_DIRECTORY set.
-        */
-
-       if (!dentry_is_root(dentry)) {
-               if (inode_is_directory(inode))
-                       info->dir_count++;
-               else
-                       info->file_count++;
-       }
+       if (inode_is_directory(inode))
+               info->dir_count++;
+       else
+               info->file_count++;
 
-       /*
-        * Update total bytes and hard link bytes.
-        *
-        * We try to act the same as the MS implementation, even though there
-        * are some inconsistencies/bugs in the way it operates.
-        *
-        * If there are no alternate data streams in the image, the "total
-        * bytes" is the sum of the size of the un-named data stream of each
-        * inode times the link count of that inode.  In other words, it would
-        * be the total number of bytes of regular files you would have if you
-        * extracted the full image without any hard-links.  The "hard link
-        * bytes" is equal to the "total bytes" minus the size of the un-named
-        * data stream of each inode.  In other words, the "hard link bytes"
-        * counts the size of the un-named data stream for all the links to each
-        * inode except the first one.
-        *
-        * Reparse points and directories don't seem to be counted in either the
-        * total bytes or the hard link bytes.
-        *
-        * And now we get to the most confusing part, the alternate data
-        * streams.  They are not counted in the "total bytes".  However, if the
-        * link count of an inode with alternate data streams is 2 or greater,
-        * the size of all the alternate data streams is included in the "hard
-        * link bytes", and this size is multiplied by the link count (NOT one
-        * less than the link count).
-        */
-       if (!(inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
-                                    FILE_ATTRIBUTE_REPARSE_POINT)))
-       {
-               struct blob_descriptor *blob;
-
-               blob = inode_get_blob_for_unnamed_data_stream(inode,
-                                                             info->blob_table);
-               if (blob) {
-                       info->total_bytes += blob->size;
-                       if (!dentry_is_first_in_inode(dentry))
-                               info->hard_link_bytes += blob->size;
-               }
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               const struct blob_descriptor *blob;
 
-               if (inode->i_nlink >= 2 && dentry_is_first_in_inode(dentry)) {
-                       for (unsigned i = 0; i < inode->i_num_streams; i++) {
-                               if (stream_is_named_data_stream(&inode->i_streams[i])) {
-                                       blob = stream_blob(&inode->i_streams[i],
-                                                          info->blob_table);
-                                       if (blob) {
-                                               info->hard_link_bytes += inode->i_nlink *
-                                                                        blob->size;
-                                       }
-                               }
-                       }
-               }
+               blob = stream_blob(&inode->i_streams[i], info->blob_table);
+               if (!blob)
+                       continue;
+               info->total_bytes += blob->size;
+               if (!dentry_is_first_in_inode(dentry))
+                       info->hard_link_bytes += blob->size;
        }
        return 0;
 }
@@ -1243,6 +1189,9 @@ calculate_dentry_statistics(struct wim_dentry *dentry, void *arg)
 /*
  * Calculate what to put in the <FILECOUNT>, <DIRCOUNT>, <TOTALBYTES>, and
  * <HARDLINKBYTES> elements of the specified WIM image.
+ *
+ * Note: since these stats are likely to be used for display purposes only, we
+ * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
  */
 void
 xml_update_image_info(WIMStruct *wim, int image)