From: Eric Biggers Date: Fri, 20 Jun 2014 04:58:43 +0000 (-0500) Subject: extract.c: Fix for running out of file handles X-Git-Tag: v1.7.1~92 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=c1d980511c9b9e53dec66710904ad8f98bc22809 extract.c: Fix for running out of file handles --- diff --git a/NEWS b/NEWS index 30558f43..e29d8bae 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +Version 1.7.1-BETA: + The new (as of v1.7.0) extraction code will no longer run out of file + handles when extracting many (1000+) identical files. + + Library users can now initialize and de-initialize the library multiple + times in one run of an application program. + Version 1.7.0: Improved compression, decompression, and extraction performance. diff --git a/include/wimlib/apply.h b/include/wimlib/apply.h index 6c595d51..9f5f86bd 100644 --- a/include/wimlib/apply.h +++ b/include/wimlib/apply.h @@ -1,6 +1,7 @@ #ifndef _WIMLIB_APPLY_H #define _WIMLIB_APPLY_H +#include "wimlib/file_io.h" #include "wimlib/list.h" #include "wimlib/progress.h" #include "wimlib/types.h" @@ -66,8 +67,15 @@ struct apply_ctx { struct list_head stream_list; const struct read_stream_list_callbacks *saved_cbs; struct wim_lookup_table_entry *cur_stream; + struct filedes tmpfile_fd; + tchar *tmpfile_name; }; +/* Maximum number of UNIX file descriptors, NTFS attributes, or Windows file + * handles that can be opened simultaneously to extract a single-instance + * stream to multiple destinations. */ +#define MAX_OPEN_STREAMS 512 + static inline int extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg) { diff --git a/include/wimlib/resource.h b/include/wimlib/resource.h index ae077553..f17d5f3a 100644 --- a/include/wimlib/resource.h +++ b/include/wimlib/resource.h @@ -295,6 +295,10 @@ read_stream_list(struct list_head *stream_list, const struct read_stream_list_callbacks *cbs, int flags); +extern int +read_full_stream_with_cbs(struct wim_lookup_table_entry *lte, + const struct read_stream_list_callbacks *cbs); + /* Functions to extract streams. */ extern int diff --git a/include/wimlib_tchar.h b/include/wimlib_tchar.h index d5a5c66e..8248d8d9 100644 --- a/include/wimlib_tchar.h +++ b/include/wimlib_tchar.h @@ -52,6 +52,7 @@ typedef wchar_t tchar; # define tstrerror _wcserror # define taccess _waccess # define tstrdup wcsdup +# define ttempnam _wtempnam # define tgetenv _wgetenv # define totlower(c) towlower((wchar_t)(c)) /* The following "tchar" functions do not have exact wide-character equivalents @@ -109,6 +110,7 @@ typedef char tchar; # define tstrtoul strtoul # define tmkdir mkdir # define tstrdup strdup +# define ttempnam tempnam # define tgetenv getenv # define totlower(c) tolower((unsigned char)(c)) # define TSTRDUP STRDUP diff --git a/src/extract.c b/src/extract.c index 65e60dea..383aaf71 100644 --- a/src/extract.c +++ b/src/extract.c @@ -266,20 +266,62 @@ out: return ret; } +/* Creates a temporary file opened for writing. The open file descriptor is + * returned in @fd_ret and its name is returned in @name_ret (dynamically + * allocated). */ static int -begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte, - u32 flags, void *_ctx) +create_temporary_file(struct filedes *fd_ret, tchar **name_ret) +{ + tchar *name; + int open_flags; + int raw_fd; + +retry: + name = ttempnam(NULL, T("wimlib")); + if (!name) { + ERROR_WITH_ERRNO("Failed to create temporary filename"); + return WIMLIB_ERR_NOMEM; + } + + open_flags = O_WRONLY | O_CREAT | O_EXCL | O_BINARY; +#ifdef __WIN32__ + open_flags |= _O_SHORT_LIVED; +#endif + raw_fd = topen(name, open_flags, 0600); + + if (raw_fd < 0) { + if (errno == EEXIST) { + FREE(name); + goto retry; + } + ERROR_WITH_ERRNO("Failed to create temporary file " + "\"%"TS"\"", name); + FREE(name); + return WIMLIB_ERR_OPEN; + } + + filedes_init(fd_ret, raw_fd); + *name_ret = name; + return 0; +} + +static int +begin_extract_stream_wrapper(struct wim_lookup_table_entry *lte, + u32 flags, void *_ctx) { struct apply_ctx *ctx = _ctx; ctx->cur_stream = lte; - return (*ctx->saved_cbs->begin_stream)(lte, flags, - ctx->saved_cbs->begin_stream_ctx); + if (unlikely(lte->out_refcnt > MAX_OPEN_STREAMS)) + return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name); + else + return (*ctx->saved_cbs->begin_stream)(lte, flags, + ctx->saved_cbs->begin_stream_ctx); } static int -consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) +extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx) { struct apply_ctx *ctx = _ctx; union wimlib_progress_info *progress = &ctx->progress; @@ -318,8 +360,71 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) ctx->next_progress = progress->extract.total_bytes; } } - return (*ctx->saved_cbs->consume_chunk)(chunk, size, - ctx->saved_cbs->consume_chunk_ctx); + + if (unlikely(filedes_valid(&ctx->tmpfile_fd))) { + /* Just extracting to temporary file for now. */ + ret = full_write(&ctx->tmpfile_fd, chunk, size); + if (ret) { + ERROR_WITH_ERRNO("Error writing data to " + "temporary file \"%"TS"\"", + ctx->tmpfile_name); + } + return ret; + } else { + return (*ctx->saved_cbs->consume_chunk)(chunk, size, + ctx->saved_cbs->consume_chunk_ctx); + } +} + +static int +extract_from_tmpfile(const tchar *tmpfile_name, + struct wim_lookup_table_entry *orig_lte, + struct apply_ctx *ctx) +{ + struct wim_lookup_table_entry tmpfile_lte; + const struct stream_owner *owners = stream_owners(orig_lte); + int ret; + + /* Copy the stream's data from the temporary file to each of its + * destinations. + * + * This is executed only in the very uncommon case that a + * single-instance stream is being extracted to more than + * MAX_OPEN_STREAMS locations! */ + + memcpy(&tmpfile_lte, orig_lte, sizeof(struct wim_lookup_table_entry)); + tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK; + tmpfile_lte.file_on_disk = ctx->tmpfile_name; + tmpfile_lte.out_refcnt = 1; + + for (u32 i = 0; i < orig_lte->out_refcnt; i++) { + tmpfile_lte.inline_stream_owners[0] = owners[i]; + ret = read_full_stream_with_cbs(&tmpfile_lte, ctx->saved_cbs); + if (ret) + return ret; + } + return 0; +} + +static int +end_extract_stream_wrapper(struct wim_lookup_table_entry *stream, + int status, void *_ctx) +{ + struct apply_ctx *ctx = _ctx; + + if (unlikely(filedes_valid(&ctx->tmpfile_fd))) { + filedes_close(&ctx->tmpfile_fd); + if (!status) + status = extract_from_tmpfile(ctx->tmpfile_name, + stream, ctx); + filedes_invalidate(&ctx->tmpfile_fd); + tunlink(ctx->tmpfile_name); + FREE(ctx->tmpfile_name); + return status; + } else { + return (*ctx->saved_cbs->end_stream)(stream, status, + ctx->saved_cbs->end_stream_ctx); + } } /* @@ -332,30 +437,34 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) * * This also works if the WIM is being read from a pipe, whereas attempting to * read streams directly (e.g. with read_full_stream_into_buf()) will not. + * + * This also will split up streams that will need to be extracted to more than + * MAX_OPEN_STREAMS locations, as measured by the 'out_refcnt' of each stream. + * Therefore, the apply_operations implementation need not worry about running + * out of file descriptors, unless it might open more than one file descriptor + * per nominal destination (e.g. Win32 currently might because the destination + * file system might not support hard links). */ int extract_stream_list(struct apply_ctx *ctx, const struct read_stream_list_callbacks *cbs) { struct read_stream_list_callbacks wrapper_cbs = { - .begin_stream = begin_extract_stream_with_progress, + .begin_stream = begin_extract_stream_wrapper, .begin_stream_ctx = ctx, - .consume_chunk = consume_chunk_with_progress, + .consume_chunk = extract_chunk_wrapper, .consume_chunk_ctx = ctx, - .end_stream = cbs->end_stream, - .end_stream_ctx = cbs->end_stream_ctx, + .end_stream = end_extract_stream_wrapper, + .end_stream_ctx = ctx, }; - if (ctx->progfunc) { - ctx->saved_cbs = cbs; - cbs = &wrapper_cbs; - } + ctx->saved_cbs = cbs; if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { - return load_streams_from_pipe(ctx, cbs); + return load_streams_from_pipe(ctx, &wrapper_cbs); } else { return read_stream_list(&ctx->stream_list, offsetof(struct wim_lookup_table_entry, extraction_list), - cbs, VERIFY_STREAM_HASHES); + &wrapper_cbs, VERIFY_STREAM_HASHES); } } @@ -1229,6 +1338,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, ctx->progress.extract.target = target; } INIT_LIST_HEAD(&ctx->stream_list); + filedes_invalidate(&ctx->tmpfile_fd); ret = (*ops->get_supported_features)(target, &ctx->supported_features); if (ret) diff --git a/src/ntfs-3g_apply.c b/src/ntfs-3g_apply.c index 59edc363..d075d3d1 100644 --- a/src/ntfs-3g_apply.c +++ b/src/ntfs-3g_apply.c @@ -70,8 +70,6 @@ ntfs_3g_get_supported_features(const char *target, return 0; } -#define MAX_OPEN_ATTRS 1024 - struct ntfs_3g_apply_ctx { /* Extract flags, the pointer to the WIMStruct, etc. */ struct apply_ctx common; @@ -79,9 +77,9 @@ struct ntfs_3g_apply_ctx { /* Pointer to the open NTFS volume */ ntfs_volume *vol; - ntfs_attr *open_attrs[MAX_OPEN_ATTRS]; + ntfs_attr *open_attrs[MAX_OPEN_STREAMS]; unsigned num_open_attrs; - ntfs_inode *open_inodes[MAX_OPEN_ATTRS]; + ntfs_inode *open_inodes[MAX_OPEN_STREAMS]; unsigned num_open_inodes; struct reparse_buffer_disk rpbuf; @@ -91,8 +89,8 @@ struct ntfs_3g_apply_ctx { u64 offset; unsigned num_reparse_inodes; - ntfs_inode *ntfs_reparse_inodes[MAX_OPEN_ATTRS]; - struct wim_inode *wim_reparse_inodes[MAX_OPEN_ATTRS]; + ntfs_inode *ntfs_reparse_inodes[MAX_OPEN_STREAMS]; + struct wim_inode *wim_reparse_inodes[MAX_OPEN_STREAMS]; }; static size_t @@ -725,10 +723,8 @@ ntfs_3g_begin_extract_stream_to_attr(struct wim_lookup_table_entry *stream, return WIMLIB_ERR_NTFS_3G; } - if (ctx->num_open_attrs == MAX_OPEN_ATTRS) { - ERROR("Can't extract data: too many open files!"); - return WIMLIB_ERR_UNSUPPORTED; - } + /* This should be ensured by extract_stream_list() */ + wimlib_assert(ctx->num_open_attrs < MAX_OPEN_STREAMS); attr = ntfs_attr_open(ni, AT_DATA, stream_name, stream_name_nchars); if (!attr) { diff --git a/src/resource.c b/src/resource.c index 300aec8f..470b63ec 100644 --- a/src/resource.c +++ b/src/resource.c @@ -1167,7 +1167,7 @@ out_next_cb: return (*ctx->cbs.end_stream)(lte, ret, ctx->cbs.end_stream_ctx); } -static int +int read_full_stream_with_cbs(struct wim_lookup_table_entry *lte, const struct read_stream_list_callbacks *cbs) { diff --git a/src/unix_apply.c b/src/unix_apply.c index 077b49db..b41c1e2c 100644 --- a/src/unix_apply.c +++ b/src/unix_apply.c @@ -26,6 +26,7 @@ #endif #include "wimlib/apply.h" +#include "wimlib/assert.h" #include "wimlib/dentry.h" #include "wimlib/error.h" #include "wimlib/file_io.h" @@ -64,8 +65,6 @@ unix_get_supported_features(const char *target, } #define NUM_PATHBUFS 2 /* We need 2 when creating hard links */ -#define MAX_OPEN_FDS 1000 /* TODO: Add special case for when the number of - identical streams exceeds this number. */ struct unix_apply_ctx { /* Extract flags, the pointer to the WIMStruct, etc. */ @@ -78,7 +77,7 @@ struct unix_apply_ctx { unsigned which_pathbuf; /* Currently open file descriptors for extraction */ - struct filedes open_fds[MAX_OPEN_FDS]; + struct filedes open_fds[MAX_OPEN_STREAMS]; /* Number of currently open file descriptors in open_fds, starting from * the beginning of the array. */ @@ -543,10 +542,8 @@ unix_begin_extract_stream_instance(const struct wim_lookup_table_entry *stream, return 0; } - if (ctx->num_open_fds == MAX_OPEN_FDS) { - ERROR("Can't extract data: too many open files!"); - return WIMLIB_ERR_UNSUPPORTED; - } + /* This should be ensured by extract_stream_list() */ + wimlib_assert(ctx->num_open_fds < MAX_OPEN_STREAMS); first_dentry = inode_first_extraction_dentry(inode); first_path = unix_build_extraction_path(first_dentry, ctx); diff --git a/src/win32_apply.c b/src/win32_apply.c index 842d588e..6879445c 100644 --- a/src/win32_apply.c +++ b/src/win32_apply.c @@ -40,10 +40,6 @@ #include "wimlib/xml.h" #include "wimlib/wimboot.h" -/* TODO: Add workaround for when a stream needs to be extracted to more places - * than this */ -#define MAX_OPEN_HANDLES 32768 - struct win32_apply_ctx { /* Extract flags, the pointer to the WIMStruct, etc. */ @@ -105,7 +101,7 @@ struct win32_apply_ctx { /* Array of open handles to filesystem streams currently being written */ - HANDLE open_handles[MAX_OPEN_HANDLES]; + HANDLE open_handles[MAX_OPEN_STREAMS]; /* Number of handles in @open_handles currently open (filled in from the * beginning of the array) */ @@ -1359,8 +1355,10 @@ begin_extract_stream_instance(const struct wim_lookup_table_entry *stream, } } - /* Too many open handles? */ - if (ctx->num_open_handles == MAX_OPEN_HANDLES) { + if (ctx->num_open_handles == MAX_OPEN_STREAMS) { + /* XXX: Fix this. But because of the checks in + * extract_stream_list(), this can now only happen on a + * filesystem that does not support hard links. */ ERROR("Can't extract data: too many open files!"); return WIMLIB_ERR_UNSUPPORTED; }