-/*
- * unix_build_dentry_tree():
- * Builds a tree of WIM dentries from an on-disk directory tree (UNIX
- * version; no NTFS-specific data is captured).
- *
- * @root_ret: Place to return a pointer to the root of the dentry tree. Only
- * modified if successful. Set to NULL if the file or directory was
- * excluded from capture.
- *
- * @root_disk_path: The path to the root of the directory tree on disk.
- *
- * @lookup_table: The lookup table for the WIM file. For each file added to the
- * dentry tree being built, an entry is added to the lookup table,
- * unless an identical stream is already in the lookup table.
- * These lookup table entries that are added point to the path of
- * the file on disk.
- *
- * @sd_set: Ignored. (Security data only captured in NTFS mode.)
- *
- * @capture_config:
- * Configuration for files to be excluded from capture.
- *
- * @add_flags: Bitwise or of WIMLIB_ADD_IMAGE_FLAG_*
- *
- * @extra_arg: Ignored
- *
- * @return: 0 on success, nonzero on failure. It is a failure if any of
- * the files cannot be `stat'ed, or if any of the needed
- * directories cannot be opened or read. Failure to add the files
- * to the WIM may still occur later when trying to actually read
- * the on-disk files during a call to wimlib_write() or
- * wimlib_overwrite().
- */
-static int
-unix_build_dentry_tree(struct wim_dentry **root_ret,
- const char *root_disk_path,
- struct wim_lookup_table *lookup_table,
- struct sd_set *sd_set,
- const struct capture_config *config,
- int add_image_flags,
- wimlib_progress_func_t progress_func,
- void *extra_arg)
-{
- char *path_buf;
- int ret;
- size_t path_len;
- size_t path_bufsz;
-
- path_bufsz = min(32790, PATH_MAX + 1);
- path_len = strlen(root_disk_path);
-
- if (path_len >= path_bufsz)
- return WIMLIB_ERR_INVALID_PARAM;
-
- path_buf = MALLOC(path_bufsz);
- if (!path_buf)
- return WIMLIB_ERR_NOMEM;
- memcpy(path_buf, root_disk_path, path_len + 1);
- ret = unix_build_dentry_tree_recursive(root_ret,
- path_buf,
- path_len,
- lookup_table,
- config,
- add_image_flags,
- progress_func);
- FREE(path_buf);
- return ret;
-}
-#endif /* !__WIN32__ */
-
-enum pattern_type {
- NONE = 0,
- EXCLUSION_LIST,
- EXCLUSION_EXCEPTION,
- COMPRESSION_EXCLUSION_LIST,
- ALIGNMENT_LIST,
-};
-
-#define COMPAT_DEFAULT_CONFIG
-
-/* Default capture configuration file when none is specified. */
-static const tchar *default_config =
-#ifdef COMPAT_DEFAULT_CONFIG /* XXX: This policy is being moved to library
- users. The next ABI-incompatible library
- version will default to the empty string here. */
-T(
-"[ExclusionList]\n"
-"\\$ntfs.log\n"
-"\\hiberfil.sys\n"
-"\\pagefile.sys\n"
-"\\System Volume Information\n"
-"\\RECYCLER\n"
-"\\Windows\\CSC\n"
-);
-#else
-T("");
-#endif
-
-static void
-destroy_pattern_list(struct pattern_list *list)
-{
- FREE(list->pats);
-}
-
-static void
-destroy_capture_config(struct capture_config *config)
-{
- destroy_pattern_list(&config->exclusion_list);
- destroy_pattern_list(&config->exclusion_exception);
- destroy_pattern_list(&config->compression_exclusion_list);
- destroy_pattern_list(&config->alignment_list);
- FREE(config->config_str);
- memset(config, 0, sizeof(*config));
-}
-
-static int
-pattern_list_add_pattern(struct pattern_list *list, const tchar *pattern)
-{
- const tchar **pats;
- if (list->num_pats >= list->num_allocated_pats) {
- pats = REALLOC(list->pats,
- sizeof(list->pats[0]) * (list->num_allocated_pats + 8));
- if (!pats)
- return WIMLIB_ERR_NOMEM;
- list->num_allocated_pats += 8;
- list->pats = pats;
- }
- list->pats[list->num_pats++] = pattern;
- return 0;
-}
-
-/* Parses the contents of the image capture configuration file and fills in a
- * `struct capture_config'. */
-static int
-init_capture_config(struct capture_config *config,
- const tchar *_config_str,
- size_t config_num_tchars)
-{
- tchar *config_str;
- tchar *p;
- tchar *eol;
- tchar *next_p;
- size_t num_tchars_remaining;
- enum pattern_type type = NONE;
- int ret;
- unsigned long line_no = 0;
-
- DEBUG("config_num_tchars = %zu", config_num_tchars);
- num_tchars_remaining = config_num_tchars;
- memset(config, 0, sizeof(*config));
- config_str = TMALLOC(config_num_tchars);
- if (!config_str) {
- ERROR("Could not duplicate capture config string");
- return WIMLIB_ERR_NOMEM;
- }
-
- tmemcpy(config_str, _config_str, config_num_tchars);
- next_p = config_str;
- config->config_str = config_str;
- while (num_tchars_remaining != 0) {
- line_no++;
- p = next_p;
- eol = tmemchr(p, T('\n'), num_tchars_remaining);
- if (!eol) {
- ERROR("Expected end-of-line in capture config file on "
- "line %lu", line_no);
- ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
- goto out_destroy;
- }
-
- next_p = eol + 1;
- num_tchars_remaining -= (next_p - p);
- if (eol == p)
- continue;
-
- if (*(eol - 1) == T('\r'))
- eol--;
- *eol = T('\0');
-
- /* Translate backslash to forward slash */
- for (tchar *pp = p; pp != eol; pp++)
- if (*pp == T('\\'))
- *pp = T('/');
-
- /* Check if the path begins with a drive letter */
- if (eol - p > 2 && *p != T('/') && *(p + 1) == T(':')) {
- /* Don't allow relative paths on other drives */
- if (eol - p < 3 || *(p + 2) != T('/')) {
- ERROR("Relative paths including a drive letter "
- "are not allowed!\n"
- " Perhaps you meant "
- "\"%"TS":/%"TS"\"?\n",
- *p, p + 2);
- ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
- goto out_destroy;
- }
- #ifndef __WIN32__
- /* UNIX: strip the drive letter */
- p += 2;
- #endif
- }
-
- ret = 0;
- if (!tstrcmp(p, T("[ExclusionList]")))
- type = EXCLUSION_LIST;
- else if (!tstrcmp(p, T("[ExclusionException]")))
- type = EXCLUSION_EXCEPTION;
- else if (!tstrcmp(p, T("[CompressionExclusionList]")))
- type = COMPRESSION_EXCLUSION_LIST;
- else if (!tstrcmp(p, T("[AlignmentList]")))
- type = ALIGNMENT_LIST;
- else if (p[0] == T('[') && tstrrchr(p, T(']'))) {
- ERROR("Unknown capture configuration section \"%"TS"\"", p);
- ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
- } else switch (type) {
- case EXCLUSION_LIST:
- DEBUG("Adding pattern \"%"TS"\" to exclusion list", p);
- ret = pattern_list_add_pattern(&config->exclusion_list, p);
- break;
- case EXCLUSION_EXCEPTION:
- DEBUG("Adding pattern \"%"TS"\" to exclusion exception list", p);
- ret = pattern_list_add_pattern(&config->exclusion_exception, p);
- break;
- case COMPRESSION_EXCLUSION_LIST:
- DEBUG("Adding pattern \"%"TS"\" to compression exclusion list", p);
- ret = pattern_list_add_pattern(&config->compression_exclusion_list, p);
- break;
- case ALIGNMENT_LIST:
- DEBUG("Adding pattern \"%"TS"\" to alignment list", p);
- ret = pattern_list_add_pattern(&config->alignment_list, p);
- break;
- default:
- ERROR("Line %lu of capture configuration is not "
- "in a block (such as [ExclusionList])",
- line_no);
- ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
- break;
- }
- if (ret != 0)
- goto out_destroy;
- }
- return 0;
-out_destroy:
- destroy_capture_config(config);
- return ret;
-}
-
-static bool
-is_absolute_path(const tchar *path)
-{
- if (*path == T('/'))
- return true;
-#ifdef __WIN32__
- /* Drive letter */
- if (*path && *(path + 1) == T(':'))
- return true;
-#endif
- return false;
-}
-
-static bool
-match_pattern(const tchar *path,
- const tchar *path_basename,
- const struct pattern_list *list)
-{
- for (size_t i = 0; i < list->num_pats; i++) {
- const tchar *pat = list->pats[i];
- const tchar *string;
- if (is_absolute_path(pat)) {
- /* Absolute path from root of capture */
- string = path;
- } else {
- if (tstrchr(pat, T('/')))
- /* Relative path from root of capture */
- string = path + 1;
- else
- /* A file name pattern */
- string = path_basename;
- }
-
- /* Warning: on Windows native builds, fnmatch() calls the
- * replacement function in win32.c. */
- if (fnmatch(pat, string, FNM_PATHNAME
- #ifdef FNM_CASEFOLD
- | FNM_CASEFOLD
- #endif
- ) == 0)
- {
- DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"",
- string, pat);
- return true;
- }
- }
- return false;
-}
-
-/* Return true if the image capture configuration file indicates we should
- * exclude the filename @path from capture.
- *
- * If @exclude_prefix is %true, the part of the path up and including the name
- * of the directory being captured is not included in the path for matching
- * purposes. This allows, for example, a pattern like /hiberfil.sys to match a
- * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
- * directory.
- */
-bool
-exclude_path(const tchar *path, size_t path_len,
- const struct capture_config *config, bool exclude_prefix)
-{
- const tchar *basename = path_basename_with_len(path, path_len);
- if (exclude_prefix) {
- wimlib_assert(path_len >= config->prefix_num_tchars);
- if (!tmemcmp(config->prefix, path, config->prefix_num_tchars) &&
- path[config->prefix_num_tchars] == T('/'))
- {
- path += config->prefix_num_tchars;
- }
- }
- return match_pattern(path, basename, &config->exclusion_list) &&
- !match_pattern(path, basename, &config->exclusion_exception);
-
-}
-
-/* Strip leading and trailing forward slashes from a string. Modifies it in
- * place and returns the stripped string. */
-static const tchar *
-canonicalize_target_path(tchar *target_path)
-{
- tchar *p;
- if (target_path == NULL)
- return T("");
- for (;;) {
- if (*target_path == T('\0'))
- return target_path;
- else if (*target_path == T('/'))
- target_path++;
- else
- break;
- }
-
- p = tstrchr(target_path, T('\0')) - 1;
- while (*p == T('/'))
- *p-- = T('\0');
- return target_path;
-}
-
-/* Strip leading and trailing slashes from the target paths, and translate all
- * backslashes in the source and target paths into forward slashes. */
-static void
-canonicalize_sources_and_targets(struct wimlib_capture_source *sources,
- size_t num_sources)
-{
- while (num_sources--) {
- DEBUG("Canonicalizing { source: \"%"TS"\", target=\"%"TS"\"}",
- sources->fs_source_path,
- sources->wim_target_path);
-
- /* The Windows API can handle forward slashes. Just get rid of
- * backslashes to avoid confusing other parts of the library
- * code. */
- zap_backslashes(sources->fs_source_path);
- if (sources->wim_target_path)
- zap_backslashes(sources->wim_target_path);
-
- sources->wim_target_path =
- (tchar*)canonicalize_target_path(sources->wim_target_path);
- DEBUG("Canonical target: \"%"TS"\"", sources->wim_target_path);
- sources++;
- }
-}
-
-static int
-capture_source_cmp(const void *p1, const void *p2)
-{
- const struct wimlib_capture_source *s1 = p1, *s2 = p2;
- return tstrcmp(s1->wim_target_path, s2->wim_target_path);
-}
-
-/* Sorts the capture sources lexicographically by target path. This occurs
- * after leading and trailing forward slashes are stripped.
- *
- * One purpose of this is to make sure that target paths that are inside other
- * target paths are added after the containing target paths. */
-static void
-sort_sources(struct wimlib_capture_source *sources, size_t num_sources)
-{
- qsort(sources, num_sources, sizeof(sources[0]), capture_source_cmp);
-}
-
-static int
-check_sorted_sources(struct wimlib_capture_source *sources, size_t num_sources,
- int add_image_flags)
-{
- if (add_image_flags & WIMLIB_ADD_IMAGE_FLAG_NTFS) {
- if (num_sources != 1) {
- ERROR("Must specify exactly 1 capture source "
- "(the NTFS volume) in NTFS mode!");
- return WIMLIB_ERR_INVALID_PARAM;
- }
- if (sources[0].wim_target_path[0] != T('\0')) {
- ERROR("In NTFS capture mode the target path inside "
- "the image must be the root directory!");
- return WIMLIB_ERR_INVALID_PARAM;
- }
- } else if (num_sources != 0) {
- /* This code is disabled because the current code
- * unconditionally attempts to do overlays. So, duplicate
- * target paths are OK. */
- #if 0
- if (num_sources > 1 && sources[0].wim_target_path[0] == '\0') {
- ERROR("Cannot specify root target when using multiple "
- "capture sources!");
- return WIMLIB_ERR_INVALID_PARAM;
- }
- for (size_t i = 0; i < num_sources - 1; i++) {
- size_t len = strlen(sources[i].wim_target_path);
- size_t j = i + 1;
- const char *target1 = sources[i].wim_target_path;
- do {
- const char *target2 = sources[j].wim_target_path;
- DEBUG("target1=%s, target2=%s",
- target1,target2);
- if (strncmp(target1, target2, len) ||
- target2[len] > '/')
- break;
- if (target2[len] == '/') {
- ERROR("Invalid target `%s': is a prefix of `%s'",
- target1, target2);
- return WIMLIB_ERR_INVALID_PARAM;
- }
- if (target2[len] == '\0') {
- ERROR("Invalid target `%s': is a duplicate of `%s'",
- target1, target2);
- return WIMLIB_ERR_INVALID_PARAM;
- }
- } while (++j != num_sources);
- }
- #endif
- }
- return 0;
-
-}
-
-/* Creates a new directory to place in the WIM image. This is to create parent
- * directories that are not part of any target as needed. */
-static int
-new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)