X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fextract.c;h=b59e22352f81f6078e8fc224a4df322817b12628;hb=88afc46984c143cbf928a3186f8cc93d819e9269;hp=769abbe15235551b9d93a4f5647d0b66b133024d;hpb=5ea71b35aff264dea029792b1fdb3404c6bd9d54;p=wimlib

diff --git a/src/extract.c b/src/extract.c
index 769abbe1..b59e2235 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -63,6 +63,7 @@
 #include "wimlib/unix_data.h"
 #include "wimlib/wim.h"
 #include "wimlib/win32.h" /* for realpath() equivalent */
+#include "wimlib/xattr.h"
 #include "wimlib/xml.h"
 
 #define WIMLIB_EXTRACT_FLAG_FROM_PIPE   0x80000000
@@ -141,6 +142,70 @@ end_file_metadata_phase(struct apply_ctx *ctx)
 	return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
 }
 
+/* Are all bytes in the specified buffer zero? */
+static bool
+is_all_zeroes(const u8 *p, const size_t size)
+{
+	const u8 * const end = p + size;
+
+	for (; (uintptr_t)p % WORDBYTES && p != end; p++)
+		if (*p)
+			return false;
+
+	for (; end - p >= WORDBYTES; p += WORDBYTES)
+		if (*(const machine_word_t *)p)
+			return false;
+
+	for (; p != end; p++)
+		if (*p)
+			return false;
+
+	return true;
+}
+
+/*
+ * Sparse regions should be detected at the granularity of the filesystem block
+ * size.  For now just assume 4096 bytes, which is the default block size on
+ * NTFS and most Linux filesystems.
+ */
+#define SPARSE_UNIT 4096
+
+/*
+ * Detect whether the specified buffer begins with a region of all zero bytes.
+ * Return %true if a zero region was found or %false if a nonzero region was
+ * found, and sets *len_ret to the length of the region.  This operates at a
+ * granularity of SPARSE_UNIT bytes, meaning that to extend a zero region, there
+ * must be SPARSE_UNIT zero bytes with no interruption, but to extend a nonzero
+ * region, just one nonzero byte in the next SPARSE_UNIT bytes is sufficient.
+ *
+ * Note: besides compression, the WIM format doesn't yet have a way to
+ * efficiently represent zero regions, so that's why we need to detect them
+ * ourselves.  Things will still fall apart badly on extremely large sparse
+ * files, but this is a start...
+ */
+bool
+detect_sparse_region(const void *data, size_t size, size_t *len_ret)
+{
+	const void *p = data;
+	const void * const end = data + size;
+	size_t len = 0;
+	bool zeroes = false;
+
+	while (p != end) {
+		size_t n = min(end - p, SPARSE_UNIT);
+		bool z = is_all_zeroes(p, n);
+
+		if (len != 0 && z != zeroes)
+			break;
+		zeroes = z;
+		len += n;
+		p += n;
+	}
+
+	*len_ret = len;
+	return zeroes;
+}
+
 #define PWM_FOUND_WIM_HDR (-1)
 
 /* Read the header for a blob in a pipable WIM.  If @pwm_hdr_ret is not NULL,
@@ -356,13 +421,10 @@ retry:
 }
 
 static int
-begin_extract_blob_wrapper(struct blob_descriptor *blob, void *_ctx)
+begin_extract_blob(struct blob_descriptor *blob, void *_ctx)
 {
 	struct apply_ctx *ctx = _ctx;
 
-	ctx->cur_blob = blob;
-	ctx->cur_blob_offset = 0;
-
 	if (unlikely(blob->out_refcnt > MAX_OPEN_FILES))
 		return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name);
 
@@ -370,29 +432,29 @@ begin_extract_blob_wrapper(struct blob_descriptor *blob, void *_ctx)
 }
 
 static int
-extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
+extract_chunk(const struct blob_descriptor *blob, u64 offset,
+	      const void *chunk, size_t size, void *_ctx)
 {
 	struct apply_ctx *ctx = _ctx;
 	union wimlib_progress_info *progress = &ctx->progress;
+	bool last = (offset + size == blob->size);
 	int ret;
 
-	ctx->cur_blob_offset += size;
-
 	if (likely(ctx->supported_features.hard_links)) {
 		progress->extract.completed_bytes +=
-			(u64)size * ctx->cur_blob->out_refcnt;
-		if (ctx->cur_blob_offset == ctx->cur_blob->size)
-			progress->extract.completed_streams += ctx->cur_blob->out_refcnt;
+			(u64)size * blob->out_refcnt;
+		if (last)
+			progress->extract.completed_streams += blob->out_refcnt;
 	} else {
 		const struct blob_extraction_target *targets =
-			blob_extraction_targets(ctx->cur_blob);
-		for (u32 i = 0; i < ctx->cur_blob->out_refcnt; i++) {
+			blob_extraction_targets(blob);
+		for (u32 i = 0; i < blob->out_refcnt; i++) {
 			const struct wim_inode *inode = targets[i].inode;
 			const struct wim_dentry *dentry;
 
 			inode_for_each_extraction_alias(dentry, inode) {
 				progress->extract.completed_bytes += size;
-				if (ctx->cur_blob_offset == ctx->cur_blob->size)
+				if (last)
 					progress->extract.completed_streams++;
 			}
 		}
@@ -419,7 +481,7 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
 		return ret;
 	}
 
-	return call_consume_chunk(chunk, size, ctx->saved_cbs);
+	return call_continue_blob(blob, offset, chunk, size, ctx->saved_cbs);
 }
 
 /* Copy the blob's data from the temporary file to each of its targets.
@@ -450,7 +512,7 @@ extract_from_tmpfile(const tchar *tmpfile_name,
 }
 
 static int
-end_extract_blob_wrapper(struct blob_descriptor *blob, int status, void *_ctx)
+end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx)
 {
 	struct apply_ctx *ctx = _ctx;
 
@@ -489,9 +551,9 @@ int
 extract_blob_list(struct apply_ctx *ctx, const struct read_blob_callbacks *cbs)
 {
 	struct read_blob_callbacks wrapper_cbs = {
-		.begin_blob	= begin_extract_blob_wrapper,
-		.consume_chunk	= extract_chunk_wrapper,
-		.end_blob	= end_extract_blob_wrapper,
+		.begin_blob	= begin_extract_blob,
+		.continue_blob	= extract_chunk,
+		.end_blob	= end_extract_blob,
 		.ctx		= ctx,
 	};
 	ctx->saved_cbs = cbs;
@@ -761,9 +823,7 @@ dentry_calculate_extraction_name(struct wim_dentry *dentry,
 
 	if (!ctx->supported_features.case_sensitive_filenames) {
 		struct wim_dentry *other;
-		list_for_each_entry(other, &dentry->d_ci_conflict_list,
-				    d_ci_conflict_list)
-		{
+		dentry_for_each_ci_match(other, dentry) {
 			if (will_extract_dentry(other)) {
 				if (ctx->extract_flags &
 				    WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) {
@@ -1165,6 +1225,8 @@ inode_tally_features(const struct wim_inode *inode,
 		features->unix_data++;
 	if (inode_has_object_id(inode))
 		features->object_ids++;
+	if (inode_has_linux_xattrs(inode))
+		features->linux_xattrs++;
 }
 
 /* Tally features necessary to extract a dentry and the corresponding inode.  */
@@ -1302,21 +1364,30 @@ do_feature_check(const struct wim_features *required_features,
 		WARNING("Ignoring Windows NT security descriptors of %lu files",
 			required_features->security_descriptors);
 
-	/* UNIX data.  */
+	/* Standard UNIX metadata */
 	if ((extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) &&
 	    required_features->unix_data && !supported_features->unix_data)
 	{
-		ERROR("Extraction backend does not support UNIX data!");
+		ERROR("Requested UNIX metadata extraction, but extraction "
+		      "backend does not support it!");
 		return WIMLIB_ERR_UNSUPPORTED;
 	}
-
 	if (required_features->unix_data &&
 	    !(extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA))
 	{
-		WARNING("Ignoring UNIX metadata of %lu files",
+		WARNING("Ignoring UNIX metadata (uid/gid/mode/rdev) of %lu files",
 			required_features->unix_data);
 	}
 
+	/* Linux-style extended attributes */
+	if (required_features->linux_xattrs &&
+	    (!supported_features->linux_xattrs ||
+	     !(extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA)))
+	{
+		WARNING("Ignoring Linux-style extended attributes of %lu files",
+			required_features->linux_xattrs);
+	}
+
 	/* Object IDs.  */
 	if (required_features->object_ids && !supported_features->object_ids) {
 		WARNING("Ignoring object IDs of %lu files",
@@ -1756,13 +1827,11 @@ extract_single_image(WIMStruct *wim, int image,
 }
 
 static const tchar * const filename_forbidden_chars =
-T(
 #ifdef __WIN32__
-"<>:\"/\\|?*"
+T("<>:\"/\\|?*");
 #else
-"/"
+T("/");
 #endif
-);
 
 /* This function checks if it is okay to use a WIM image's name as a directory
  * name.  */