On success, select() and pselect() return the number of file descriptors contained in the three returned descriptor sets (that is, the total number of bits that are set in readfds, writefds, exceptfds) which may be zero if the timeout expires before anything interesting happens. On error, -1 is returned, and errno is set to indicate the error; the file descriptor sets are unmodified, and timeout becomes undefined.
成功返回三种类型的总的句柄数
超时返回0
异常返回-1
nfds
nfds should be set to the highest-numbered file descriptor in any of the three sets, plus 1. The indicated file descriptors in each set are checked, up to this limit (but see BUGS).
Three independent sets of file descriptors are watched.
The file descriptors listed in readfds will be watched to see if characters become available for reading (more precisely, to see if a read will not block; in particular, a file descriptor is also ready on end-of- file).
The file descriptors in writefds will be watched to see if space is available for write (though a large write may still block).
The file descriptors in exceptfds will be watched for exceptional conditions. (For examples of some exceptional conditions, see the discussion of POLLPRI in poll(2).)
/* * We can actually return ERESTARTSYS instead of EINTR, but I'd * like to be certain this leads to no problems. So I return * EINTR just for safety. * * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ intcore_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec *end_time) { fd_set_bits fds; void *bits; int ret, max_fds; unsignedint size; structfdtable *fdt; /* Allocate small arguments on the stack to save memory and be faster */ long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
ret = -EINVAL; if (n < 0) goto out_nofds;
/* max_fds can increase, so grab it once to avoid race */ rcu_read_lock(); fdt = files_fdtable(current->files); max_fds = fdt->max_fds; rcu_read_unlock(); if (n > max_fds) n = max_fds;
/* * We need 6 bitmaps (in/out/ex for both incoming and outgoing), * since we used fdset we need to allocate memory in units of * long-words. */ size = FDS_BYTES(n); bits = stack_fds; if (size > sizeof(stack_fds) / 6) { /* Not enough space in on-stack array; must use kmalloc */ ret = -ENOMEM; bits = kmalloc(6 * size, GFP_KERNEL); if (!bits) goto out_nofds; } fds.in = bits; fds.out = bits + size; fds.ex = bits + 2*size; fds.res_in = bits + 3*size; fds.res_out = bits + 4*size; fds.res_ex = bits + 5*size;
for (i = 0; i < n; ++rinp, ++routp, ++rexp) { unsignedlong in, out, ex, all_bits, bit = 1, mask, j; unsignedlong res_in = 0, res_out = 0, res_ex = 0;
in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; if (all_bits == 0) { i += BITS_PER_LONG; continue; }
for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { structfdf; if (i >= n) break; if (!(bit & all_bits)) continue; f = fdget(i); if (f.file) { conststructfile_operations *f_op; f_op = f.file->f_op; mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { wait_key_set(wait, in, out, bit); //核心函数 mask = (*f_op->poll)(f.file, wait); } fdput(f); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; wait->_qproc = NULL; } if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; wait->_qproc = NULL; } if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; wait->_qproc = NULL; } } } if (res_in) *rinp = res_in; if (res_out) *routp = res_out; if (res_ex) *rexp = res_ex; cond_resched(); } wait->_qproc = NULL; if (retval || timed_out || signal_pending(current)) break; if (table.error) { retval = table.error; break; }
/* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to * pointer to the expiry value. */ if (end_time && !to) { expire = timespec_to_ktime(*end_time); to = &expire; }
if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; }
/* * Although this function is called under waitqueue lock, LOCK * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() * and is paired with set_mb() in poll_schedule_timeout. */ smp_wmb(); pwq->triggered = 1;
/* * Perform the default wake up operation using a dummy * waitqueue. * * TODO: This is hacky but there currently is no interface to * pass in @sync. @sync is scheduled to be removed and once * that happens, wake_up_process() can be used directly. */ return default_wake_function(&dummy_wait, mode, sync, key); }
intdo_sys_poll(struct pollfd __user *ufds, unsignedint nfds, struct timespec *end_time) { structpoll_wqueuestable; int err = -EFAULT, fdcount, len, size; /* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; structpoll_list *consthead = (struct poll_list *)stack_pps; structpoll_list *walk = head; unsignedlong todo = nfds;
if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL;
len = min_t(unsignedint, nfds, N_STACK_PPS); for (;;) { walk->next = NULL; walk->len = len; if (!len) break;
/* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { pt->_qproc = NULL; timed_out = 1; }
if (end_time && !timed_out) slack = select_estimate_accuracy(end_time);
for (;;) { structpoll_list *walk;
for (walk = list; walk != NULL; walk = walk->next) { structpollfd * pfd, * pfd_end;
pfd = walk->entries; pfd_end = pfd + walk->len; for (; pfd != pfd_end; pfd++) { /* * Fish for events. If we found one, record it * and kill poll_table->_qproc, so we don't * needlessly register any other waiters after * this. They'll get immediately deregistered * when we break out and return. */ if (do_pollfd(pfd, pt)) { count++; pt->_qproc = NULL; } } } /* * All waiters have already been registered, so don't provide * a poll_table->_qproc to them on the next loop iteration. */ pt->_qproc = NULL; if (!count) { count = wait->error; if (signal_pending(current)) count = -EINTR; } if (count || timed_out) break;
/* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to * pointer to the expiry value. */ if (end_time && !to) { expire = timespec_to_ktime(*end_time); to = &expire; }
if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack)) timed_out = 1; } return count; }
/* * Fish for pollable events on the pollfd->fd file descriptor. We're only * interested in events matching the pollfd->events mask, and the result * matching that mask is both recorded in pollfd->revents and returned. The * pwait poll_table will be used by the fd-provided poll handler for waiting, * if pwait->_qproc is non-NULL. */ staticinlineunsignedintdo_pollfd(struct pollfd *pollfd, poll_table *pwait) { unsignedint mask; int fd;