6 * Copyright (C) 2014 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see https://www.gnu.org/licenses/.
33 #include "wimlib/encoding.h"
34 #include "wimlib/error.h"
35 #include "wimlib/file_io.h"
36 #include "wimlib/textfile.h"
37 #include "wimlib/util.h"
40 stdin_get_contents(void **buf_ret, size_t *bufsize_ret)
47 size_t new_capacity = (capacity * 2) + 256;
50 if (new_capacity <= capacity ||
51 !(new_buf = REALLOC(buf, new_capacity))) {
52 ERROR("Too much data sent on stdin!");
54 return WIMLIB_ERR_INVALID_PARAM;
57 capacity = new_capacity;
58 filled += fread(&buf[filled], 1, capacity - filled, stdin);
59 } while (filled == capacity);
62 ERROR_WITH_ERRNO("Error reading stdin");
64 return WIMLIB_ERR_READ;
67 *bufsize_ret = filled;
72 read_file_contents(const tchar *path, void **buf_ret, size_t *bufsize_ret)
81 raw_fd = topen(path, O_RDONLY | O_BINARY);
83 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
84 return WIMLIB_ERR_OPEN;
86 if (fstat(raw_fd, &st)) {
87 ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
89 return WIMLIB_ERR_STAT;
91 if ((size_t)st.st_size != st.st_size ||
92 (buf = MALLOC(st.st_size)) == NULL)
95 ERROR("Not enough memory to read \"%"TS"\"", path);
96 return WIMLIB_ERR_NOMEM;
99 filedes_init(&fd, raw_fd);
100 ret = full_read(&fd, buf, st.st_size);
105 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
111 *bufsize_ret = st.st_size;
116 translate_text_buffer(const u8 *buf_raw, size_t bufsize_raw,
117 tchar **tstr_ret, size_t *tstr_nchars_ret)
125 /* Guess the encoding: UTF-8 or UTF-16LE. (Something weirder and you're
126 * out of luck, sorry...) */
127 if (bufsize_raw >= 2 &&
128 buf_raw[0] == 0xFF &&
134 else if (bufsize_raw >= 2 &&
135 buf_raw[0] <= 0x7F &&
141 else if (bufsize_raw >= 3 &&
142 buf_raw[0] == 0xEF &&
143 buf_raw[1] == 0xBB &&
156 ret = utf8_to_tstr((const char *)(buf_raw + offset_raw),
157 bufsize_raw - offset_raw,
158 &buf_tstr, &bufsize_tstr);
160 ret = utf16le_to_tstr((const utf16lechar *)(buf_raw + offset_raw),
161 bufsize_raw - offset_raw,
162 &buf_tstr, &bufsize_tstr);
167 *tstr_ret = buf_tstr;
168 *tstr_nchars_ret = bufsize_tstr / sizeof(tchar);
173 string_list_append(struct string_list *list, tchar *str)
175 size_t num_alloc_strings = list->num_alloc_strings;
177 if (list->num_strings == num_alloc_strings) {
180 num_alloc_strings = max(num_alloc_strings * 3 / 2,
181 num_alloc_strings + 4);
182 new_strings = REALLOC(list->strings,
183 sizeof(list->strings[0]) * num_alloc_strings);
185 return WIMLIB_ERR_NOMEM;
186 list->strings = new_strings;
187 list->num_alloc_strings = num_alloc_strings;
189 list->strings[list->num_strings++] = str;
193 #define NOT_IN_SECTION -1
194 #define IN_UNKNOWN_SECTION -2
197 parse_text_file(const tchar *path, tchar *buf, size_t buflen,
198 const struct text_file_section *pos_sections,
199 int num_pos_sections, int flags, line_mangle_t mangle_line)
201 int current_section = NOT_IN_SECTION;
202 bool have_named_sections = false;
205 unsigned long line_no = 1;
207 for (int i = 0; i < num_pos_sections; i++) {
208 if (*pos_sections[i].name)
209 have_named_sections = true;
214 for (p = buf; p != buf + buflen; p = nl + 1, line_no++) {
215 tchar *line_begin, *line_end;
219 nl = tmemchr(p, T('\n'), buf + buflen - p);
226 /* Ignore leading whitespace. */
227 while (line_begin < nl && istspace(*line_begin))
230 /* Ignore trailing whitespace. */
231 while (line_end > line_begin && istspace(*(line_end - 1)))
234 line_len = line_end - line_begin;
236 /* Ignore comments and empty lines. */
237 if (line_len == 0 || *line_begin == T(';') || *line_begin == T('#'))
240 line_begin[line_len] = T('\0');
242 /* Check for beginning of new section. */
243 if (line_begin[0] == T('[') &&
244 line_begin[line_len - 1] == T(']') &&
247 line_begin[line_len - 1] = T('\0');
248 current_section = IN_UNKNOWN_SECTION;
249 for (int i = 0; i < num_pos_sections; i++) {
250 if (!tstrcmp(line_begin + 1,
251 pos_sections[i].name))
257 line_begin[line_len - 1] = T(']');
258 if (current_section < 0) {
259 if (!(flags & LOAD_TEXT_FILE_NO_WARNINGS)) {
260 WARNING("%"TS":%lu: Unrecognized section \"%"TS"\"",
261 path, line_no, line_begin);
267 if (current_section < 0) {
268 if (current_section == NOT_IN_SECTION) {
269 if (!(flags & LOAD_TEXT_FILE_NO_WARNINGS)) {
270 WARNING("%"TS":%lu: Not in a bracketed section!",
277 if (flags & LOAD_TEXT_FILE_REMOVE_QUOTES) {
278 if (line_begin[0] == T('"') || line_begin[0] == T('\'')) {
279 tchar quote = line_begin[0];
281 line_begin[line_len - 1] == quote)
285 line_begin[line_len] = T('\0');
291 ret = (*mangle_line)(line_begin, path, line_no);
296 ret = string_list_append(pos_sections[current_section].strings,
307 * Read and parse lines from a text file given as an on-disk file, standard
308 * input, or a buffer. The file may contain sections, like in an INI file.
311 * If @buf is NULL, then either the path to the file on-disk to read, or
312 * NULL to read from standard input. Else, a dummy name for the buffer.
314 * If NULL, the data will be read from the @path file. Otherwise the data
315 * will be read from this buffer.
317 * Length of buffer in bytes; ignored if @buf is NULL.
319 * On success, a pointer to a buffer backing the parsed lines is stored
320 * here. This must be freed after the parsed lines are done being used.
322 * Specifications of allowed sections in the file. Each such specification
323 * consists of the name of the section (e.g. [ExclusionList], like in the
324 * INI file format), along with a pointer to the list of lines parsed for
325 * that section. Use an empty name to indicate the destination of lines
326 * not in any section. Each list must be initialized to empty.
328 * Number of entries in the @pos_sections array.
330 * Flags: LOAD_TEXT_FILE_* flags.
332 * Optional callback to validate and/or modify each line being read.
334 * Returns 0 on success; nonzero on failure.
336 * Unknown sections are ignored, but a warning is printed for each, unless
337 * LOAD_TEXT_FILE_NO_WARNINGS is specified.
340 load_text_file(const tchar *path, const void *buf, size_t bufsize,
342 const struct text_file_section *pos_sections,
343 int num_pos_sections,
344 int flags, line_mangle_t mangle_line)
347 bool is_filemode = (buf == NULL);
348 bool is_stdin = (is_filemode && path == NULL);
352 if (is_stdin && !(flags & LOAD_TEXT_FILE_ALLOW_STDIN))
353 return WIMLIB_ERR_INVALID_PARAM;
357 ret = stdin_get_contents((void **)&buf, &bufsize);
359 ret = read_file_contents(path, (void **)&buf, &bufsize);
364 ret = translate_text_buffer(buf, bufsize, &tstr, &tstr_nchars);
370 tstr[tstr_nchars++] = T('\n');
372 ret = parse_text_file(is_stdin ? T("<stdin>") : path,
373 tstr, tstr_nchars, pos_sections,
374 num_pos_sections, flags, mangle_line);
376 for (int i = 0; i < num_pos_sections; i++)
377 FREE(pos_sections[i].strings->strings);
386 /* API function documented in wimlib.h */
388 wimlib_load_text_file(const tchar *path,
389 tchar **tstr_ret, size_t *tstr_nchars_ret)
395 if (path == NULL || (path[0] == '-' && path[1] == '\0'))
396 ret = stdin_get_contents(&buf, &bufsize);
398 ret = read_file_contents(path, &buf, &bufsize);
401 ret = translate_text_buffer(buf, bufsize, tstr_ret, tstr_nchars_ret);