extract.c: Fix for running out of file handles
authorEric Biggers <ebiggers3@gmail.com>
Fri, 20 Jun 2014 04:58:43 +0000 (23:58 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Fri, 20 Jun 2014 04:58:43 +0000 (23:58 -0500)
NEWS
include/wimlib/apply.h
include/wimlib/resource.h
include/wimlib_tchar.h
src/extract.c
src/ntfs-3g_apply.c
src/resource.c
src/unix_apply.c
src/win32_apply.c

diff --git a/NEWS b/NEWS
index 30558f4..e29d8ba 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+Version 1.7.1-BETA:
+       The new (as of v1.7.0) extraction code will no longer run out of file
+       handles when extracting many (1000+) identical files.
+
+       Library users can now initialize and de-initialize the library multiple
+       times in one run of an application program.
+
 Version 1.7.0:
        Improved compression, decompression, and extraction performance.
 
index 6c595d5..9f5f86b 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _WIMLIB_APPLY_H
 #define _WIMLIB_APPLY_H
 
+#include "wimlib/file_io.h"
 #include "wimlib/list.h"
 #include "wimlib/progress.h"
 #include "wimlib/types.h"
@@ -66,8 +67,15 @@ struct apply_ctx {
        struct list_head stream_list;
        const struct read_stream_list_callbacks *saved_cbs;
        struct wim_lookup_table_entry *cur_stream;
+       struct filedes tmpfile_fd;
+       tchar *tmpfile_name;
 };
 
+/* Maximum number of UNIX file descriptors, NTFS attributes, or Windows file
+ * handles that can be opened simultaneously to extract a single-instance
+ * stream to multiple destinations.  */
+#define MAX_OPEN_STREAMS 512
+
 static inline int
 extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg)
 {
index ae07755..f17d5f3 100644 (file)
@@ -295,6 +295,10 @@ read_stream_list(struct list_head *stream_list,
                 const struct read_stream_list_callbacks *cbs,
                 int flags);
 
+extern int
+read_full_stream_with_cbs(struct wim_lookup_table_entry *lte,
+                         const struct read_stream_list_callbacks *cbs);
+
 /* Functions to extract streams.  */
 
 extern int
index d5a5c66..8248d8d 100644 (file)
@@ -52,6 +52,7 @@ typedef wchar_t tchar;
 #  define tstrerror    _wcserror
 #  define taccess      _waccess
 #  define tstrdup      wcsdup
+#  define ttempnam     _wtempnam
 #  define tgetenv      _wgetenv
 #  define totlower(c)  towlower((wchar_t)(c))
 /* The following "tchar" functions do not have exact wide-character equivalents
@@ -109,6 +110,7 @@ typedef char tchar;
 #  define tstrtoul     strtoul
 #  define tmkdir       mkdir
 #  define tstrdup      strdup
+#  define ttempnam     tempnam
 #  define tgetenv      getenv
 #  define totlower(c)  tolower((unsigned char)(c))
 #  define TSTRDUP      STRDUP
index 65e60de..383aaf7 100644 (file)
@@ -266,20 +266,62 @@ out:
        return ret;
 }
 
+/* Creates a temporary file opened for writing.  The open file descriptor is
+ * returned in @fd_ret and its name is returned in @name_ret (dynamically
+ * allocated).  */
 static int
-begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte,
-                                  u32 flags, void *_ctx)
+create_temporary_file(struct filedes *fd_ret, tchar **name_ret)
+{
+       tchar *name;
+       int open_flags;
+       int raw_fd;
+
+retry:
+       name = ttempnam(NULL, T("wimlib"));
+       if (!name) {
+               ERROR_WITH_ERRNO("Failed to create temporary filename");
+               return WIMLIB_ERR_NOMEM;
+       }
+
+       open_flags = O_WRONLY | O_CREAT | O_EXCL | O_BINARY;
+#ifdef __WIN32__
+       open_flags |= _O_SHORT_LIVED;
+#endif
+       raw_fd = topen(name, open_flags, 0600);
+
+       if (raw_fd < 0) {
+               if (errno == EEXIST) {
+                       FREE(name);
+                       goto retry;
+               }
+               ERROR_WITH_ERRNO("Failed to create temporary file "
+                                "\"%"TS"\"", name);
+               FREE(name);
+               return WIMLIB_ERR_OPEN;
+       }
+
+       filedes_init(fd_ret, raw_fd);
+       *name_ret = name;
+       return 0;
+}
+
+static int
+begin_extract_stream_wrapper(struct wim_lookup_table_entry *lte,
+                            u32 flags, void *_ctx)
 {
        struct apply_ctx *ctx = _ctx;
 
        ctx->cur_stream = lte;
 
-       return (*ctx->saved_cbs->begin_stream)(lte, flags,
-                                              ctx->saved_cbs->begin_stream_ctx);
+       if (unlikely(lte->out_refcnt > MAX_OPEN_STREAMS))
+               return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name);
+       else
+               return (*ctx->saved_cbs->begin_stream)(lte, flags,
+                                                      ctx->saved_cbs->begin_stream_ctx);
 }
 
 static int
-consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
+extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
 {
        struct apply_ctx *ctx = _ctx;
        union wimlib_progress_info *progress = &ctx->progress;
@@ -318,8 +360,71 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
                                ctx->next_progress = progress->extract.total_bytes;
                }
        }
-       return (*ctx->saved_cbs->consume_chunk)(chunk, size,
-                                               ctx->saved_cbs->consume_chunk_ctx);
+
+       if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
+               /* Just extracting to temporary file for now.  */
+               ret = full_write(&ctx->tmpfile_fd, chunk, size);
+               if (ret) {
+                       ERROR_WITH_ERRNO("Error writing data to "
+                                        "temporary file \"%"TS"\"",
+                                        ctx->tmpfile_name);
+               }
+               return ret;
+       } else {
+               return (*ctx->saved_cbs->consume_chunk)(chunk, size,
+                                                       ctx->saved_cbs->consume_chunk_ctx);
+       }
+}
+
+static int
+extract_from_tmpfile(const tchar *tmpfile_name,
+                    struct wim_lookup_table_entry *orig_lte,
+                    struct apply_ctx *ctx)
+{
+       struct wim_lookup_table_entry tmpfile_lte;
+       const struct stream_owner *owners = stream_owners(orig_lte);
+       int ret;
+
+       /* Copy the stream's data from the temporary file to each of its
+        * destinations.
+        *
+        * This is executed only in the very uncommon case that a
+        * single-instance stream is being extracted to more than
+        * MAX_OPEN_STREAMS locations!  */
+
+       memcpy(&tmpfile_lte, orig_lte, sizeof(struct wim_lookup_table_entry));
+       tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK;
+       tmpfile_lte.file_on_disk = ctx->tmpfile_name;
+       tmpfile_lte.out_refcnt = 1;
+
+       for (u32 i = 0; i < orig_lte->out_refcnt; i++) {
+               tmpfile_lte.inline_stream_owners[0] = owners[i];
+               ret = read_full_stream_with_cbs(&tmpfile_lte, ctx->saved_cbs);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int
+end_extract_stream_wrapper(struct wim_lookup_table_entry *stream,
+                          int status, void *_ctx)
+{
+       struct apply_ctx *ctx = _ctx;
+
+       if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
+               filedes_close(&ctx->tmpfile_fd);
+               if (!status)
+                       status = extract_from_tmpfile(ctx->tmpfile_name,
+                                                     stream, ctx);
+               filedes_invalidate(&ctx->tmpfile_fd);
+               tunlink(ctx->tmpfile_name);
+               FREE(ctx->tmpfile_name);
+               return status;
+       } else {
+               return (*ctx->saved_cbs->end_stream)(stream, status,
+                                                    ctx->saved_cbs->end_stream_ctx);
+       }
 }
 
 /*
@@ -332,30 +437,34 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
  *
  * This also works if the WIM is being read from a pipe, whereas attempting to
  * read streams directly (e.g. with read_full_stream_into_buf()) will not.
+ *
+ * This also will split up streams that will need to be extracted to more than
+ * MAX_OPEN_STREAMS locations, as measured by the 'out_refcnt' of each stream.
+ * Therefore, the apply_operations implementation need not worry about running
+ * out of file descriptors, unless it might open more than one file descriptor
+ * per nominal destination (e.g. Win32 currently might because the destination
+ * file system might not support hard links).
  */
 int
 extract_stream_list(struct apply_ctx *ctx,
                    const struct read_stream_list_callbacks *cbs)
 {
        struct read_stream_list_callbacks wrapper_cbs = {
-               .begin_stream      = begin_extract_stream_with_progress,
+               .begin_stream      = begin_extract_stream_wrapper,
                .begin_stream_ctx  = ctx,
-               .consume_chunk     = consume_chunk_with_progress,
+               .consume_chunk     = extract_chunk_wrapper,
                .consume_chunk_ctx = ctx,
-               .end_stream        = cbs->end_stream,
-               .end_stream_ctx    = cbs->end_stream_ctx,
+               .end_stream        = end_extract_stream_wrapper,
+               .end_stream_ctx    = ctx,
        };
-       if (ctx->progfunc) {
-               ctx->saved_cbs = cbs;
-               cbs = &wrapper_cbs;
-       }
+       ctx->saved_cbs = cbs;
        if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
-               return load_streams_from_pipe(ctx, cbs);
+               return load_streams_from_pipe(ctx, &wrapper_cbs);
        } else {
                return read_stream_list(&ctx->stream_list,
                                        offsetof(struct wim_lookup_table_entry,
                                                 extraction_list),
-                                       cbs, VERIFY_STREAM_HASHES);
+                                       &wrapper_cbs, VERIFY_STREAM_HASHES);
        }
 }
 
@@ -1229,6 +1338,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
                ctx->progress.extract.target = target;
        }
        INIT_LIST_HEAD(&ctx->stream_list);
+       filedes_invalidate(&ctx->tmpfile_fd);
 
        ret = (*ops->get_supported_features)(target, &ctx->supported_features);
        if (ret)
index 59edc36..d075d3d 100644 (file)
@@ -70,8 +70,6 @@ ntfs_3g_get_supported_features(const char *target,
        return 0;
 }
 
-#define MAX_OPEN_ATTRS 1024
-
 struct ntfs_3g_apply_ctx {
        /* Extract flags, the pointer to the WIMStruct, etc.  */
        struct apply_ctx common;
@@ -79,9 +77,9 @@ struct ntfs_3g_apply_ctx {
        /* Pointer to the open NTFS volume  */
        ntfs_volume *vol;
 
-       ntfs_attr *open_attrs[MAX_OPEN_ATTRS];
+       ntfs_attr *open_attrs[MAX_OPEN_STREAMS];
        unsigned num_open_attrs;
-       ntfs_inode *open_inodes[MAX_OPEN_ATTRS];
+       ntfs_inode *open_inodes[MAX_OPEN_STREAMS];
        unsigned num_open_inodes;
 
        struct reparse_buffer_disk rpbuf;
@@ -91,8 +89,8 @@ struct ntfs_3g_apply_ctx {
        u64 offset;
 
        unsigned num_reparse_inodes;
-       ntfs_inode *ntfs_reparse_inodes[MAX_OPEN_ATTRS];
-       struct wim_inode *wim_reparse_inodes[MAX_OPEN_ATTRS];
+       ntfs_inode *ntfs_reparse_inodes[MAX_OPEN_STREAMS];
+       struct wim_inode *wim_reparse_inodes[MAX_OPEN_STREAMS];
 };
 
 static size_t
@@ -725,10 +723,8 @@ ntfs_3g_begin_extract_stream_to_attr(struct wim_lookup_table_entry *stream,
                return WIMLIB_ERR_NTFS_3G;
        }
 
-       if (ctx->num_open_attrs == MAX_OPEN_ATTRS) {
-               ERROR("Can't extract data: too many open files!");
-               return WIMLIB_ERR_UNSUPPORTED;
-       }
+       /* This should be ensured by extract_stream_list()  */
+       wimlib_assert(ctx->num_open_attrs < MAX_OPEN_STREAMS);
 
        attr = ntfs_attr_open(ni, AT_DATA, stream_name, stream_name_nchars);
        if (!attr) {
index 300aec8..470b63e 100644 (file)
@@ -1167,7 +1167,7 @@ out_next_cb:
                return (*ctx->cbs.end_stream)(lte, ret, ctx->cbs.end_stream_ctx);
 }
 
-static int
+int
 read_full_stream_with_cbs(struct wim_lookup_table_entry *lte,
                          const struct read_stream_list_callbacks *cbs)
 {
index 077b49d..b41c1e2 100644 (file)
@@ -26,6 +26,7 @@
 #endif
 
 #include "wimlib/apply.h"
+#include "wimlib/assert.h"
 #include "wimlib/dentry.h"
 #include "wimlib/error.h"
 #include "wimlib/file_io.h"
@@ -64,8 +65,6 @@ unix_get_supported_features(const char *target,
 }
 
 #define NUM_PATHBUFS 2  /* We need 2 when creating hard links  */
-#define MAX_OPEN_FDS 1000 /* TODO: Add special case for when the number of
-                            identical streams exceeds this number.  */
 
 struct unix_apply_ctx {
        /* Extract flags, the pointer to the WIMStruct, etc.  */
@@ -78,7 +77,7 @@ struct unix_apply_ctx {
        unsigned which_pathbuf;
 
        /* Currently open file descriptors for extraction  */
-       struct filedes open_fds[MAX_OPEN_FDS];
+       struct filedes open_fds[MAX_OPEN_STREAMS];
 
        /* Number of currently open file descriptors in open_fds, starting from
         * the beginning of the array.  */
@@ -543,10 +542,8 @@ unix_begin_extract_stream_instance(const struct wim_lookup_table_entry *stream,
                return 0;
        }
 
-       if (ctx->num_open_fds == MAX_OPEN_FDS) {
-               ERROR("Can't extract data: too many open files!");
-               return WIMLIB_ERR_UNSUPPORTED;
-       }
+       /* This should be ensured by extract_stream_list()  */
+       wimlib_assert(ctx->num_open_fds < MAX_OPEN_STREAMS);
 
        first_dentry = inode_first_extraction_dentry(inode);
        first_path = unix_build_extraction_path(first_dentry, ctx);
index 842d588..6879445 100644 (file)
 #include "wimlib/xml.h"
 #include "wimlib/wimboot.h"
 
-/* TODO: Add workaround for when a stream needs to be extracted to more places
- * than this  */
-#define MAX_OPEN_HANDLES 32768
-
 struct win32_apply_ctx {
 
        /* Extract flags, the pointer to the WIMStruct, etc.  */
@@ -105,7 +101,7 @@ struct win32_apply_ctx {
 
        /* Array of open handles to filesystem streams currently being written
         */
-       HANDLE open_handles[MAX_OPEN_HANDLES];
+       HANDLE open_handles[MAX_OPEN_STREAMS];
 
        /* Number of handles in @open_handles currently open (filled in from the
         * beginning of the array)  */
@@ -1359,8 +1355,10 @@ begin_extract_stream_instance(const struct wim_lookup_table_entry *stream,
                }
        }
 
-       /* Too many open handles?  */
-       if (ctx->num_open_handles == MAX_OPEN_HANDLES) {
+       if (ctx->num_open_handles == MAX_OPEN_STREAMS) {
+               /* XXX: Fix this.  But because of the checks in
+                * extract_stream_list(), this can now only happen on a
+                * filesystem that does not support hard links.  */
                ERROR("Can't extract data: too many open files!");
                return WIMLIB_ERR_UNSUPPORTED;
        }