• R/O
  • SSH
  • HTTPS

yash: Commit


Commit MetaInfo

Revision4010 (tree)
Time2019-12-22 17:00:13
Authormagicant

Log Message

Optimize read when stdin is a regular file

POSIX requires that the shell do not read too much from the standard
input. For this reason reading from the standard input is not buffered
by default. Without buffering, the "read" syscall is called for each
byte to be read, which may degrade the performance of the shell.

After this commit, when the standard input is a regular file, the input
is buffered to improve the performance. After reading, the file
descriptor is repositioned so that it works as if there has been no
buffering.

Change Summary

Incremental Difference

--- yash/trunk/input.c (revision 4009)
+++ yash/trunk/input.c (revision 4010)
@@ -1,6 +1,6 @@
11 /* Yash: yet another shell */
22 /* input.c: functions for input of command line */
3-/* (C) 2007-2018 magicant */
3+/* (C) 2007-2019 magicant */
44
55 /* This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -28,6 +28,7 @@
2828 #include <stdio.h>
2929 #include <stdlib.h>
3030 #include <string.h>
31+#include <sys/stat.h>
3132 #include <unistd.h>
3233 #include <wchar.h>
3334 #include <wctype.h>
@@ -51,6 +52,10 @@
5152 #endif
5253
5354
55+static bool is_seekable_file(int fd);
56+static inputresult_T optimized_read_input(
57+ struct xwcsbuf_T *buf, struct input_file_info_T *info, _Bool trap)
58+ __attribute__((nonnull));
5459 static wchar_t *expand_prompt_variable(wchar_t num, wchar_t suffix)
5560 __attribute__((malloc,warn_unused_result));
5661 static const wchar_t *get_prompt_variable(wchar_t num, wchar_t suffix)
@@ -104,6 +109,9 @@
104109 inputresult_T read_input(
105110 xwcsbuf_T *buf, struct input_file_info_T *info, bool trap)
106111 {
112+ if (info->bufsize == 1 && is_seekable_file(info->fd))
113+ return optimized_read_input(buf, info, trap);
114+
107115 size_t initlen = buf->length;
108116 inputresult_T status = INPUT_EOF;
109117
@@ -176,6 +184,50 @@
176184 return status;
177185 }
178186
187+/* Checks if the file descriptor is seekable. */
188+bool is_seekable_file(int fd)
189+{
190+ struct stat st;
191+ return (fstat(fd, &st) == 0) && S_ISREG(st.st_mode);
192+ /* The result of lseek for an unseekable FD is implementation-defined, so we
193+ * should not assume such lseek to fail. We only assume a regular file is
194+ * always seekable. */
195+}
196+
197+/* Works like `read_input', but improves performance by reading many bytes at
198+ * once even if `info->bufsize' is 1. The input file descriptor must be
199+ * seekable. */
200+inputresult_T optimized_read_input(
201+ struct xwcsbuf_T *buf, struct input_file_info_T *info, _Bool trap)
202+{
203+ struct input_file_info_T *tmpinfo =
204+ xmallocs(sizeof *tmpinfo, BUFSIZ, sizeof *tmpinfo->buf);
205+ tmpinfo->fd = info->fd;
206+ tmpinfo->state = info->state;
207+ tmpinfo->bufpos = tmpinfo->bufmax = 0;
208+ tmpinfo->bufsize = BUFSIZ;
209+
210+ while (info->bufpos < info->bufmax)
211+ tmpinfo->buf[tmpinfo->bufmax++] = info->buf[info->bufpos++];
212+
213+ inputresult_T result = read_input(buf, tmpinfo, trap);
214+
215+ if (tmpinfo->bufpos < tmpinfo->bufmax) {
216+ /* rewind the FD to pretend we're not buffering */
217+ off_t diff = tmpinfo->bufmax - tmpinfo->bufpos;
218+ if (lseek(tmpinfo->fd, -diff, SEEK_CUR) == (off_t) -1) {
219+ xerror(errno,
220+ Ngt("cannot rewind file descriptor %d after reading. "
221+ "Subsequent reads may lack some text"),
222+ tmpinfo->fd);
223+ }
224+ }
225+
226+ info->state = tmpinfo->state;
227+ free(tmpinfo);
228+ return result;
229+}
230+
179231 /* An input function that prints a prompt and reads input.
180232 * `inputinfo' is a pointer to a `struct input_interactive_info'.
181233 * `inputinfo->type' must be either 1 or 2, which specifies the prompt type.
Show on old repository browser