6 * Copyright (C) 2014 Eric Biggers
8 * This file is part of wimlib, a library for working with WIM files.
10 * wimlib is free software; you can redistribute it and/or modify it under the
11 * terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option)
15 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 * A PARTICULAR PURPOSE. See the GNU General Public License for more
20 * You should have received a copy of the GNU General Public License
21 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/assert.h"
29 #include "wimlib/encoding.h"
30 #include "wimlib/error.h"
31 #include "wimlib/file_io.h"
32 #include "wimlib/textfile.h"
33 #include "wimlib/util.h"
43 read_file_contents(const tchar *path, u8 **buf_ret, size_t *bufsize_ret)
53 return WIMLIB_ERR_INVALID_PARAM;
55 raw_fd = topen(path, O_RDONLY | O_BINARY);
57 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
58 return WIMLIB_ERR_OPEN;
60 if (fstat(raw_fd, &st)) {
61 ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
63 return WIMLIB_ERR_STAT;
65 if ((size_t)st.st_size != st.st_size ||
66 (buf = MALLOC(st.st_size)) == NULL)
69 ERROR("Not enough memory to read \"%"TS"\"", path);
70 return WIMLIB_ERR_NOMEM;
73 filedes_init(&fd, raw_fd);
74 ret = full_read(&fd, buf, st.st_size);
79 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
85 *bufsize_ret = st.st_size;
90 read_text_file_contents(const tchar *path,
91 tchar **buf_ret, size_t *buflen_ret)
101 ret = read_file_contents(path, &buf_raw, &bufsize_raw);
105 /* Guess the encoding: UTF-8 or UTF-16LE. (Something weirder and you're
106 * out of luck, sorry...) */
107 if (bufsize_raw >= 2 &&
108 buf_raw[0] == 0xFF &&
114 else if (bufsize_raw >= 2 &&
115 buf_raw[0] <= 0x7F &&
121 else if (bufsize_raw >= 3 &&
122 buf_raw[0] == 0xEF &&
123 buf_raw[1] == 0xBB &&
136 ret = utf8_to_tstr((const char *)(buf_raw + offset_raw),
137 bufsize_raw - offset_raw,
138 &buf_tstr, &bufsize_tstr);
141 bufsize_tstr = bufsize_raw - offset_raw;
142 buf_tstr = MALLOC(bufsize_tstr + 2);
144 memcpy(buf_tstr, buf_raw + offset_raw, bufsize_tstr);
145 ((u8*)buf_tstr)[bufsize_tstr + 0] = 0;
146 ((u8*)buf_tstr)[bufsize_tstr + 1] = 0;
148 ret = WIMLIB_ERR_NOMEM;
151 ret = utf16le_to_tstr((const utf16lechar *)(buf_raw + offset_raw),
152 bufsize_raw - offset_raw,
153 &buf_tstr, &bufsize_tstr);
161 *buflen_ret = bufsize_tstr / sizeof(tchar);
166 string_set_append(struct string_set *set, tchar *str)
168 size_t num_alloc_strings = set->num_alloc_strings;
170 if (set->num_strings == num_alloc_strings) {
173 num_alloc_strings = max(num_alloc_strings * 3 / 2,
174 num_alloc_strings + 4);
175 new_strings = REALLOC(set->strings,
176 sizeof(set->strings[0]) * num_alloc_strings);
178 return WIMLIB_ERR_NOMEM;
179 set->strings = new_strings;
180 set->num_alloc_strings = num_alloc_strings;
182 set->strings[set->num_strings++] = str;
186 #define NOT_IN_SECTION -1
187 #define IN_UNKNOWN_SECTION -2
190 parse_text_file(const tchar *path, tchar *buf, size_t buflen,
191 const struct text_file_section *pos_sections,
192 int num_pos_sections, int flags, line_mangle_t mangle_line)
194 int current_section = NOT_IN_SECTION;
195 bool have_named_sections = false;
198 unsigned long line_no = 1;
200 for (int i = 0; i < num_pos_sections; i++) {
201 if (*pos_sections[i].name)
202 have_named_sections = true;
207 for (p = buf; p != buf + buflen; p = nl + 1, line_no++) {
208 tchar *line_begin, *line_end;
212 nl = tmemchr(p, T('\n'), buf + buflen - p);
219 /* Ignore leading whitespace. */
220 while (line_begin < nl && istspace(*line_begin))
223 /* Ignore trailing whitespace. */
224 while (line_end > line_begin && istspace(*(line_end - 1)))
227 line_len = line_end - line_begin;
229 /* Ignore comments and empty lines. */
230 if (line_len == 0 || *line_begin == T(';') || *line_begin == T('#'))
233 line_begin[line_len] = T('\0');
235 /* Check for beginning of new section. */
236 if (line_begin[0] == T('[') &&
237 line_begin[line_len - 1] == T(']') &&
240 line_begin[line_len - 1] = T('\0');
241 current_section = IN_UNKNOWN_SECTION;
242 for (int i = 0; i < num_pos_sections; i++) {
243 if (!tstrcmp(line_begin + 1,
244 pos_sections[i].name))
250 line_begin[line_len - 1] = T(']');
251 if (current_section < 0)
252 WARNING("%"TS":%lu: Unrecognized section \"%"TS"\"",
253 path, line_no, line_begin);
257 if (current_section < 0) {
258 if (current_section == NOT_IN_SECTION)
259 WARNING("%"TS":%lu: Not in a bracketed section!",
264 if (flags & LOAD_TEXT_FILE_REMOVE_QUOTES) {
265 if (line_begin[0] == T('"') || line_begin[0] == T('\'')) {
266 tchar quote = line_begin[0];
268 line_begin[line_len - 1] == quote)
272 line_begin[line_len] = T('\0');
278 ret = (*mangle_line)(line_begin, path, line_no);
283 ret = string_set_append(pos_sections[current_section].strings,
292 * do_load_text_file -
294 * Read and parse lines from a text file from an on-disk file or a buffer.
295 * The file may contain sections, like in an INI file.
298 * Path to the file on disk to read, or a dummy name for the buffer.
300 * If NULL, the data will be read from the @path file. Otherwise the data
301 * will be read from this buffer, which must be newline-terminated.
303 * Length of buffer in 'tchars'; ignored if @buf is NULL.
305 * On success, a pointer to a buffer backing the parsed lines is stored
306 * here. If @buf is not NULL, this will be @buf. Otherwise, this will be
307 * an allocated buffer that must be freed when finished with the lines.
309 * Specifications of allowed sections in the file. Each such specification
310 * consists of the name of the section (e.g. [ExclusionList], like in the
311 * INI file format), along with a pointer to the list of lines parsed for
312 * that section. Use an empty name to indicate the destination of lines
313 * not in any section.
315 * Length of @pos_sections array.
317 * LOAD_TEXT_FILE_REMOVE_QUOTES or 0.
319 * Optional callback to modify each line being read.
321 * Returns 0 on success or a positive error code on failure.
323 * Unknown sections are ignored (warning printed).
326 do_load_text_file(const tchar *path,
327 tchar *buf, size_t buflen,
329 const struct text_file_section *pos_sections,
330 int num_pos_sections,
332 line_mangle_t mangle_line)
335 bool pathmode = (buf == NULL);
338 ret = read_text_file_contents(path, &buf, &buflen);
342 /* Overwrite '\0' with '\n' to avoid special case of last line
343 * not terminated with '\n'. */
344 buf[buflen++] = T('\n');
346 wimlib_assert(buflen > 0 && buf[buflen - 1] == T('\n'));
349 ret = parse_text_file(path, buf, buflen, pos_sections,
350 num_pos_sections, flags, mangle_line);
352 for (int i = 0; i < num_pos_sections; i++)
353 FREE(pos_sections[i].strings->strings);