root/fs/pipe.c

/* [previous][next][first][last][top][bottom][index][help] [+1 fs/pipe.c] */

DEFINITIONS

This source file includes following definitions.
  1. pipe_lock_nested
  2. pipe_lock
  3. pipe_unlock
  4. __pipe_lock
  5. __pipe_unlock
  6. pipe_double_lock
  7. pipe_wait
  8. anon_pipe_buf_release
  9. generic_pipe_buf_steal
  10. generic_pipe_buf_get
  11. generic_pipe_buf_confirm
  12. generic_pipe_buf_release
  13. pipe_read
  14. is_packetized
  15. pipe_write
  16. pipe_ioctl
  17. pipe_poll
  18. put_pipe_info
  19. pipe_release
  20. pipe_fasync
  21. alloc_pipe_info
  22. free_pipe_info
  23. pipefs_dname
  24. get_pipe_inode
  25. create_pipe_files
  26. __do_pipe_flags
  27. do_pipe_flags
  28. SYSCALL_DEFINE2
  29. SYSCALL_DEFINE1
  30. wait_for_partner
  31. wake_up_partner
  32. fifo_open
  33. pipe_set_size
  34. round_pipe_size
  35. pipe_proc_fn
  36. get_pipe_info
  37. pipe_fcntl
  38. pipefs_mount
  39. init_pipe_fs

   1 /*
   2  *  linux/fs/pipe.c
   3  *
   4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
   5  */
   6 
   7 #include <linux/mm.h>
   8 #include <linux/file.h>
   9 #include <linux/poll.h>
  10 #include <linux/slab.h>
  11 #include <linux/module.h>
  12 #include <linux/init.h>
  13 #include <linux/fs.h>
  14 #include <linux/log2.h>
  15 #include <linux/mount.h>
  16 #include <linux/magic.h>
  17 #include <linux/pipe_fs_i.h>
  18 #include <linux/uio.h>
  19 #include <linux/highmem.h>
  20 #include <linux/pagemap.h>
  21 #include <linux/audit.h>
  22 #include <linux/syscalls.h>
  23 #include <linux/fcntl.h>
  24 
  25 #include <asm/uaccess.h>
  26 #include <asm/ioctls.h>
  27 
  28 #include "internal.h"
  29 
  30 /*
  31  * The max size that a non-root user is allowed to grow the pipe. Can
  32  * be set by root in /proc/sys/fs/pipe-max-size
  33  */
  34 unsigned int pipe_max_size = 1048576;
  35 
  36 /*
  37  * Minimum pipe size, as required by POSIX
  38  */
  39 unsigned int pipe_min_size = PAGE_SIZE;
  40 
  41 /*
  42  * We use a start+len construction, which provides full use of the 
  43  * allocated memory.
  44  * -- Florian Coosmann (FGC)
  45  * 
  46  * Reads with count = 0 should always return 0.
  47  * -- Julian Bradfield 1999-06-07.
  48  *
  49  * FIFOs and Pipes now generate SIGIO for both readers and writers.
  50  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
  51  *
  52  * pipe_read & write cleanup
  53  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  54  */
  55 
  56 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
     /* [previous][next][first][last][top][bottom][index][help] [+56 fs/pipe.c] */
  57 {
  58         if (pipe->files)
  59                 mutex_lock_nested(&pipe->mutex, subclass);
  60 }
  61 
  62 void pipe_lock(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+62 fs/pipe.c] */
  63 {
  64         /*
  65          * pipe_lock() nests non-pipe inode locks (for writing to a file)
  66          */
  67         pipe_lock_nested(pipe, I_MUTEX_PARENT);
  68 }
  69 EXPORT_SYMBOL(pipe_lock);
  70 
  71 void pipe_unlock(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+71 fs/pipe.c] */
  72 {
  73         if (pipe->files)
  74                 mutex_unlock(&pipe->mutex);
  75 }
  76 EXPORT_SYMBOL(pipe_unlock);
  77 
  78 static inline void __pipe_lock(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+78 fs/pipe.c] */
  79 {
  80         mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
  81 }
  82 
  83 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+83 fs/pipe.c] */
  84 {
  85         mutex_unlock(&pipe->mutex);
  86 }
  87 
  88 void pipe_double_lock(struct pipe_inode_info *pipe1,
     /* [previous][next][first][last][top][bottom][index][help] [+88 fs/pipe.c] */
  89                       struct pipe_inode_info *pipe2)
  90 {
  91         BUG_ON(pipe1 == pipe2);
  92 
  93         if (pipe1 < pipe2) {
  94                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
  95                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
  96         } else {
  97                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
  98                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
  99         }
 100 }
 101 
 102 /* Drop the inode semaphore and wait for a pipe event, atomically */
 103 void pipe_wait(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+103 fs/pipe.c] */
 104 {
 105         DEFINE_WAIT(wait);
 106 
 107         /*
 108          * Pipes are system-local resources, so sleeping on them
 109          * is considered a noninteractive wait:
 110          */
 111         prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
 112         pipe_unlock(pipe);
 113         schedule();
 114         finish_wait(&pipe->wait, &wait);
 115         pipe_lock(pipe);
 116 }
 117 
 118 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
     /* [previous][next][first][last][top][bottom][index][help] [+118 fs/pipe.c] */
 119                                   struct pipe_buffer *buf)
 120 {
 121         struct page *page = buf->page;
 122 
 123         /*
 124          * If nobody else uses this page, and we don't already have a
 125          * temporary page, let's keep track of it as a one-deep
 126          * allocation cache. (Otherwise just release our reference to it)
 127          */
 128         if (page_count(page) == 1 && !pipe->tmp_page)
 129                 pipe->tmp_page = page;
 130         else
 131                 page_cache_release(page);
 132 }
 133 
 134 /**
 135  * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
 136  * @pipe:       the pipe that the buffer belongs to
 137  * @buf:        the buffer to attempt to steal
 138  *
 139  * Description:
 140  *      This function attempts to steal the &struct page attached to
 141  *      @buf. If successful, this function returns 0 and returns with
 142  *      the page locked. The caller may then reuse the page for whatever
 143  *      he wishes; the typical use is insertion into a different file
 144  *      page cache.
 145  */
 146 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
     /* [previous][next][first][last][top][bottom][index][help] [+146 fs/pipe.c] */
 147                            struct pipe_buffer *buf)
 148 {
 149         struct page *page = buf->page;
 150 
 151         /*
 152          * A reference of one is golden, that means that the owner of this
 153          * page is the only one holding a reference to it. lock the page
 154          * and return OK.
 155          */
 156         if (page_count(page) == 1) {
 157                 lock_page(page);
 158                 return 0;
 159         }
 160 
 161         return 1;
 162 }
 163 EXPORT_SYMBOL(generic_pipe_buf_steal);
 164 
 165 /**
 166  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
 167  * @pipe:       the pipe that the buffer belongs to
 168  * @buf:        the buffer to get a reference to
 169  *
 170  * Description:
 171  *      This function grabs an extra reference to @buf. It's used in
 172  *      in the tee() system call, when we duplicate the buffers in one
 173  *      pipe into another.
 174  */
 175 void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
     /* [previous][next][first][last][top][bottom][index][help] [+175 fs/pipe.c] */
 176 {
 177         page_cache_get(buf->page);
 178 }
 179 EXPORT_SYMBOL(generic_pipe_buf_get);
 180 
 181 /**
 182  * generic_pipe_buf_confirm - verify contents of the pipe buffer
 183  * @info:       the pipe that the buffer belongs to
 184  * @buf:        the buffer to confirm
 185  *
 186  * Description:
 187  *      This function does nothing, because the generic pipe code uses
 188  *      pages that are always good when inserted into the pipe.
 189  */
 190 int generic_pipe_buf_confirm(struct pipe_inode_info *info,
     /* [previous][next][first][last][top][bottom][index][help] [+190 fs/pipe.c] */
 191                              struct pipe_buffer *buf)
 192 {
 193         return 0;
 194 }
 195 EXPORT_SYMBOL(generic_pipe_buf_confirm);
 196 
 197 /**
 198  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
 199  * @pipe:       the pipe that the buffer belongs to
 200  * @buf:        the buffer to put a reference to
 201  *
 202  * Description:
 203  *      This function releases a reference to @buf.
 204  */
 205 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
     /* [previous][next][first][last][top][bottom][index][help] [+205 fs/pipe.c] */
 206                               struct pipe_buffer *buf)
 207 {
 208         page_cache_release(buf->page);
 209 }
 210 EXPORT_SYMBOL(generic_pipe_buf_release);
 211 
 212 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 213         .can_merge = 1,
 214         .confirm = generic_pipe_buf_confirm,
 215         .release = anon_pipe_buf_release,
 216         .steal = generic_pipe_buf_steal,
 217         .get = generic_pipe_buf_get,
 218 };
 219 
 220 static const struct pipe_buf_operations packet_pipe_buf_ops = {
 221         .can_merge = 0,
 222         .confirm = generic_pipe_buf_confirm,
 223         .release = anon_pipe_buf_release,
 224         .steal = generic_pipe_buf_steal,
 225         .get = generic_pipe_buf_get,
 226 };
 227 
 228 static ssize_t
 229 pipe_read(struct kiocb *iocb, struct iov_iter *to)
     /* [previous][next][first][last][top][bottom][index][help] [+229 fs/pipe.c] */
 230 {
 231         size_t total_len = iov_iter_count(to);
 232         struct file *filp = iocb->ki_filp;
 233         struct pipe_inode_info *pipe = filp->private_data;
 234         int do_wakeup;
 235         ssize_t ret;
 236 
 237         /* Null read succeeds. */
 238         if (unlikely(total_len == 0))
 239                 return 0;
 240 
 241         do_wakeup = 0;
 242         ret = 0;
 243         __pipe_lock(pipe);
 244         for (;;) {
 245                 int bufs = pipe->nrbufs;
 246                 if (bufs) {
 247                         int curbuf = pipe->curbuf;
 248                         struct pipe_buffer *buf = pipe->bufs + curbuf;
 249                         const struct pipe_buf_operations *ops = buf->ops;
 250                         size_t chars = buf->len;
 251                         size_t written;
 252                         int error;
 253 
 254                         if (chars > total_len)
 255                                 chars = total_len;
 256 
 257                         error = ops->confirm(pipe, buf);
 258                         if (error) {
 259                                 if (!ret)
 260                                         ret = error;
 261                                 break;
 262                         }
 263 
 264                         written = copy_page_to_iter(buf->page, buf->offset, chars, to);
 265                         if (unlikely(written < chars)) {
 266                                 if (!ret)
 267                                         ret = -EFAULT;
 268                                 break;
 269                         }
 270                         ret += chars;
 271                         buf->offset += chars;
 272                         buf->len -= chars;
 273 
 274                         /* Was it a packet buffer? Clean up and exit */
 275                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
 276                                 total_len = chars;
 277                                 buf->len = 0;
 278                         }
 279 
 280                         if (!buf->len) {
 281                                 buf->ops = NULL;
 282                                 ops->release(pipe, buf);
 283                                 curbuf = (curbuf + 1) & (pipe->buffers - 1);
 284                                 pipe->curbuf = curbuf;
 285                                 pipe->nrbufs = --bufs;
 286                                 do_wakeup = 1;
 287                         }
 288                         total_len -= chars;
 289                         if (!total_len)
 290                                 break;  /* common path: read succeeded */
 291                 }
 292                 if (bufs)       /* More to do? */
 293                         continue;
 294                 if (!pipe->writers)
 295                         break;
 296                 if (!pipe->waiting_writers) {
 297                         /* syscall merging: Usually we must not sleep
 298                          * if O_NONBLOCK is set, or if we got some data.
 299                          * But if a writer sleeps in kernel space, then
 300                          * we can wait for that data without violating POSIX.
 301                          */
 302                         if (ret)
 303                                 break;
 304                         if (filp->f_flags & O_NONBLOCK) {
 305                                 ret = -EAGAIN;
 306                                 break;
 307                         }
 308                 }
 309                 if (signal_pending(current)) {
 310                         if (!ret)
 311                                 ret = -ERESTARTSYS;
 312                         break;
 313                 }
 314                 if (do_wakeup) {
 315                         wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
 316                         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 317                 }
 318                 pipe_wait(pipe);
 319         }
 320         __pipe_unlock(pipe);
 321 
 322         /* Signal writers asynchronously that there is more room. */
 323         if (do_wakeup) {
 324                 wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);
 325                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 326         }
 327         if (ret > 0)
 328                 file_accessed(filp);
 329         return ret;
 330 }
 331 
 332 static inline int is_packetized(struct file *file)
     /* [previous][next][first][last][top][bottom][index][help] [+332 fs/pipe.c] */
 333 {
 334         return (file->f_flags & O_DIRECT) != 0;
 335 }
 336 
 337 static ssize_t
 338 pipe_write(struct kiocb *iocb, struct iov_iter *from)
     /* [previous][next][first][last][top][bottom][index][help] [+338 fs/pipe.c] */
 339 {
 340         struct file *filp = iocb->ki_filp;
 341         struct pipe_inode_info *pipe = filp->private_data;
 342         ssize_t ret = 0;
 343         int do_wakeup = 0;
 344         size_t total_len = iov_iter_count(from);
 345         ssize_t chars;
 346 
 347         /* Null write succeeds. */
 348         if (unlikely(total_len == 0))
 349                 return 0;
 350 
 351         __pipe_lock(pipe);
 352 
 353         if (!pipe->readers) {
 354                 send_sig(SIGPIPE, current, 0);
 355                 ret = -EPIPE;
 356                 goto out;
 357         }
 358 
 359         /* We try to merge small writes */
 360         chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
 361         if (pipe->nrbufs && chars != 0) {
 362                 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
 363                                                         (pipe->buffers - 1);
 364                 struct pipe_buffer *buf = pipe->bufs + lastbuf;
 365                 const struct pipe_buf_operations *ops = buf->ops;
 366                 int offset = buf->offset + buf->len;
 367 
 368                 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
 369                         ret = ops->confirm(pipe, buf);
 370                         if (ret)
 371                                 goto out;
 372 
 373                         ret = copy_page_from_iter(buf->page, offset, chars, from);
 374                         if (unlikely(ret < chars)) {
 375                                 ret = -EFAULT;
 376                                 goto out;
 377                         }
 378                         do_wakeup = 1;
 379                         buf->len += ret;
 380                         if (!iov_iter_count(from))
 381                                 goto out;
 382                 }
 383         }
 384 
 385         for (;;) {
 386                 int bufs;
 387 
 388                 if (!pipe->readers) {
 389                         send_sig(SIGPIPE, current, 0);
 390                         if (!ret)
 391                                 ret = -EPIPE;
 392                         break;
 393                 }
 394                 bufs = pipe->nrbufs;
 395                 if (bufs < pipe->buffers) {
 396                         int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
 397                         struct pipe_buffer *buf = pipe->bufs + newbuf;
 398                         struct page *page = pipe->tmp_page;
 399                         int copied;
 400 
 401                         if (!page) {
 402                                 page = alloc_page(GFP_HIGHUSER);
 403                                 if (unlikely(!page)) {
 404                                         ret = ret ? : -ENOMEM;
 405                                         break;
 406                                 }
 407                                 pipe->tmp_page = page;
 408                         }
 409                         /* Always wake up, even if the copy fails. Otherwise
 410                          * we lock up (O_NONBLOCK-)readers that sleep due to
 411                          * syscall merging.
 412                          * FIXME! Is this really true?
 413                          */
 414                         do_wakeup = 1;
 415                         copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
 416                         if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
 417                                 if (!ret)
 418                                         ret = -EFAULT;
 419                                 break;
 420                         }
 421                         ret += copied;
 422 
 423                         /* Insert it into the buffer array */
 424                         buf->page = page;
 425                         buf->ops = &anon_pipe_buf_ops;
 426                         buf->offset = 0;
 427                         buf->len = copied;
 428                         buf->flags = 0;
 429                         if (is_packetized(filp)) {
 430                                 buf->ops = &packet_pipe_buf_ops;
 431                                 buf->flags = PIPE_BUF_FLAG_PACKET;
 432                         }
 433                         pipe->nrbufs = ++bufs;
 434                         pipe->tmp_page = NULL;
 435 
 436                         if (!iov_iter_count(from))
 437                                 break;
 438                 }
 439                 if (bufs < pipe->buffers)
 440                         continue;
 441                 if (filp->f_flags & O_NONBLOCK) {
 442                         if (!ret)
 443                                 ret = -EAGAIN;
 444                         break;
 445                 }
 446                 if (signal_pending(current)) {
 447                         if (!ret)
 448                                 ret = -ERESTARTSYS;
 449                         break;
 450                 }
 451                 if (do_wakeup) {
 452                         wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
 453                         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 454                         do_wakeup = 0;
 455                 }
 456                 pipe->waiting_writers++;
 457                 pipe_wait(pipe);
 458                 pipe->waiting_writers--;
 459         }
 460 out:
 461         __pipe_unlock(pipe);
 462         if (do_wakeup) {
 463                 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
 464                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 465         }
 466         if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
 467                 int err = file_update_time(filp);
 468                 if (err)
 469                         ret = err;
 470                 sb_end_write(file_inode(filp)->i_sb);
 471         }
 472         return ret;
 473 }
 474 
 475 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] [+475 fs/pipe.c] */
 476 {
 477         struct pipe_inode_info *pipe = filp->private_data;
 478         int count, buf, nrbufs;
 479 
 480         switch (cmd) {
 481                 case FIONREAD:
 482                         __pipe_lock(pipe);
 483                         count = 0;
 484                         buf = pipe->curbuf;
 485                         nrbufs = pipe->nrbufs;
 486                         while (--nrbufs >= 0) {
 487                                 count += pipe->bufs[buf].len;
 488                                 buf = (buf+1) & (pipe->buffers - 1);
 489                         }
 490                         __pipe_unlock(pipe);
 491 
 492                         return put_user(count, (int __user *)arg);
 493                 default:
 494                         return -ENOIOCTLCMD;
 495         }
 496 }
 497 
 498 /* No kernel lock held - fine */
 499 static unsigned int
 500 pipe_poll(struct file *filp, poll_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] [+500 fs/pipe.c] */
 501 {
 502         unsigned int mask;
 503         struct pipe_inode_info *pipe = filp->private_data;
 504         int nrbufs;
 505 
 506         poll_wait(filp, &pipe->wait, wait);
 507 
 508         /* Reading only -- no need for acquiring the semaphore.  */
 509         nrbufs = pipe->nrbufs;
 510         mask = 0;
 511         if (filp->f_mode & FMODE_READ) {
 512                 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
 513                 if (!pipe->writers && filp->f_version != pipe->w_counter)
 514                         mask |= POLLHUP;
 515         }
 516 
 517         if (filp->f_mode & FMODE_WRITE) {
 518                 mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
 519                 /*
 520                  * Most Unices do not set POLLERR for FIFOs but on Linux they
 521                  * behave exactly like pipes for poll().
 522                  */
 523                 if (!pipe->readers)
 524                         mask |= POLLERR;
 525         }
 526 
 527         return mask;
 528 }
 529 
 530 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+530 fs/pipe.c] */
 531 {
 532         int kill = 0;
 533 
 534         spin_lock(&inode->i_lock);
 535         if (!--pipe->files) {
 536                 inode->i_pipe = NULL;
 537                 kill = 1;
 538         }
 539         spin_unlock(&inode->i_lock);
 540 
 541         if (kill)
 542                 free_pipe_info(pipe);
 543 }
 544 
 545 static int
 546 pipe_release(struct inode *inode, struct file *file)
     /* [previous][next][first][last][top][bottom][index][help] [+546 fs/pipe.c] */
 547 {
 548         struct pipe_inode_info *pipe = file->private_data;
 549 
 550         __pipe_lock(pipe);
 551         if (file->f_mode & FMODE_READ)
 552                 pipe->readers--;
 553         if (file->f_mode & FMODE_WRITE)
 554                 pipe->writers--;
 555 
 556         if (pipe->readers || pipe->writers) {
 557                 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
 558                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 559                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 560         }
 561         __pipe_unlock(pipe);
 562 
 563         put_pipe_info(inode, pipe);
 564         return 0;
 565 }
 566 
 567 static int
 568 pipe_fasync(int fd, struct file *filp, int on)
     /* [previous][next][first][last][top][bottom][index][help] [+568 fs/pipe.c] */
 569 {
 570         struct pipe_inode_info *pipe = filp->private_data;
 571         int retval = 0;
 572 
 573         __pipe_lock(pipe);
 574         if (filp->f_mode & FMODE_READ)
 575                 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 576         if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
 577                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
 578                 if (retval < 0 && (filp->f_mode & FMODE_READ))
 579                         /* this can happen only if on == T */
 580                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 581         }
 582         __pipe_unlock(pipe);
 583         return retval;
 584 }
 585 
 586 struct pipe_inode_info *alloc_pipe_info(void)
     /* [previous][next][first][last][top][bottom][index][help] [+586 fs/pipe.c] */
 587 {
 588         struct pipe_inode_info *pipe;
 589 
 590         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 591         if (pipe) {
 592                 pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
 593                 if (pipe->bufs) {
 594                         init_waitqueue_head(&pipe->wait);
 595                         pipe->r_counter = pipe->w_counter = 1;
 596                         pipe->buffers = PIPE_DEF_BUFFERS;
 597                         mutex_init(&pipe->mutex);
 598                         return pipe;
 599                 }
 600                 kfree(pipe);
 601         }
 602 
 603         return NULL;
 604 }
 605 
 606 void free_pipe_info(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+606 fs/pipe.c] */
 607 {
 608         int i;
 609 
 610         for (i = 0; i < pipe->buffers; i++) {
 611                 struct pipe_buffer *buf = pipe->bufs + i;
 612                 if (buf->ops)
 613                         buf->ops->release(pipe, buf);
 614         }
 615         if (pipe->tmp_page)
 616                 __free_page(pipe->tmp_page);
 617         kfree(pipe->bufs);
 618         kfree(pipe);
 619 }
 620 
 621 static struct vfsmount *pipe_mnt __read_mostly;
 622 
 623 /*
 624  * pipefs_dname() is called from d_path().
 625  */
 626 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
     /* [previous][next][first][last][top][bottom][index][help] [+626 fs/pipe.c] */
 627 {
 628         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
 629                                 d_inode(dentry)->i_ino);
 630 }
 631 
 632 static const struct dentry_operations pipefs_dentry_operations = {
 633         .d_dname        = pipefs_dname,
 634 };
 635 
 636 static struct inode * get_pipe_inode(void)
     /* [previous][next][first][last][top][bottom][index][help] [+636 fs/pipe.c] */
 637 {
 638         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
 639         struct pipe_inode_info *pipe;
 640 
 641         if (!inode)
 642                 goto fail_inode;
 643 
 644         inode->i_ino = get_next_ino();
 645 
 646         pipe = alloc_pipe_info();
 647         if (!pipe)
 648                 goto fail_iput;
 649 
 650         inode->i_pipe = pipe;
 651         pipe->files = 2;
 652         pipe->readers = pipe->writers = 1;
 653         inode->i_fop = &pipefifo_fops;
 654 
 655         /*
 656          * Mark the inode dirty from the very beginning,
 657          * that way it will never be moved to the dirty
 658          * list because "mark_inode_dirty()" will think
 659          * that it already _is_ on the dirty list.
 660          */
 661         inode->i_state = I_DIRTY;
 662         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
 663         inode->i_uid = current_fsuid();
 664         inode->i_gid = current_fsgid();
 665         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 666 
 667         return inode;
 668 
 669 fail_iput:
 670         iput(inode);
 671 
 672 fail_inode:
 673         return NULL;
 674 }
 675 
 676 int create_pipe_files(struct file **res, int flags)
     /* [previous][next][first][last][top][bottom][index][help] [+676 fs/pipe.c] */
 677 {
 678         int err;
 679         struct inode *inode = get_pipe_inode();
 680         struct file *f;
 681         struct path path;
 682         static struct qstr name = { .name = "" };
 683 
 684         if (!inode)
 685                 return -ENFILE;
 686 
 687         err = -ENOMEM;
 688         path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
 689         if (!path.dentry)
 690                 goto err_inode;
 691         path.mnt = mntget(pipe_mnt);
 692 
 693         d_instantiate(path.dentry, inode);
 694 
 695         f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
 696         if (IS_ERR(f)) {
 697                 err = PTR_ERR(f);
 698                 goto err_dentry;
 699         }
 700 
 701         f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
 702         f->private_data = inode->i_pipe;
 703 
 704         res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
 705         if (IS_ERR(res[0])) {
 706                 err = PTR_ERR(res[0]);
 707                 goto err_file;
 708         }
 709 
 710         path_get(&path);
 711         res[0]->private_data = inode->i_pipe;
 712         res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 713         res[1] = f;
 714         return 0;
 715 
 716 err_file:
 717         put_filp(f);
 718 err_dentry:
 719         free_pipe_info(inode->i_pipe);
 720         path_put(&path);
 721         return err;
 722 
 723 err_inode:
 724         free_pipe_info(inode->i_pipe);
 725         iput(inode);
 726         return err;
 727 }
 728 
 729 static int __do_pipe_flags(int *fd, struct file **files, int flags)
     /* [previous][next][first][last][top][bottom][index][help] [+729 fs/pipe.c] */
 730 {
 731         int error;
 732         int fdw, fdr;
 733 
 734         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
 735                 return -EINVAL;
 736 
 737         error = create_pipe_files(files, flags);
 738         if (error)
 739                 return error;
 740 
 741         error = get_unused_fd_flags(flags);
 742         if (error < 0)
 743                 goto err_read_pipe;
 744         fdr = error;
 745 
 746         error = get_unused_fd_flags(flags);
 747         if (error < 0)
 748                 goto err_fdr;
 749         fdw = error;
 750 
 751         audit_fd_pair(fdr, fdw);
 752         fd[0] = fdr;
 753         fd[1] = fdw;
 754         return 0;
 755 
 756  err_fdr:
 757         put_unused_fd(fdr);
 758  err_read_pipe:
 759         fput(files[0]);
 760         fput(files[1]);
 761         return error;
 762 }
 763 
 764 int do_pipe_flags(int *fd, int flags)
     /* [previous][next][first][last][top][bottom][index][help] [+764 fs/pipe.c] */
 765 {
 766         struct file *files[2];
 767         int error = __do_pipe_flags(fd, files, flags);
 768         if (!error) {
 769                 fd_install(fd[0], files[0]);
 770                 fd_install(fd[1], files[1]);
 771         }
 772         return error;
 773 }
 774 
 775 /*
 776  * sys_pipe() is the normal C calling standard for creating
 777  * a pipe. It's not the way Unix traditionally does this, though.
 778  */
 779 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
     /* [previous][next][first][last][top][bottom][index][help] [+779 fs/pipe.c] */
 780 {
 781         struct file *files[2];
 782         int fd[2];
 783         int error;
 784 
 785         error = __do_pipe_flags(fd, files, flags);
 786         if (!error) {
 787                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
 788                         fput(files[0]);
 789                         fput(files[1]);
 790                         put_unused_fd(fd[0]);
 791                         put_unused_fd(fd[1]);
 792                         error = -EFAULT;
 793                 } else {
 794                         fd_install(fd[0], files[0]);
 795                         fd_install(fd[1], files[1]);
 796                 }
 797         }
 798         return error;
 799 }
 800 
 801 SYSCALL_DEFINE1(pipe, int __user *, fildes)
     /* [previous][next][first][last][top][bottom][index][help] [+801 fs/pipe.c] */
 802 {
 803         return sys_pipe2(fildes, 0);
 804 }
 805 
 806 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
     /* [previous][next][first][last][top][bottom][index][help] [+806 fs/pipe.c] */
 807 {
 808         int cur = *cnt; 
 809 
 810         while (cur == *cnt) {
 811                 pipe_wait(pipe);
 812                 if (signal_pending(current))
 813                         break;
 814         }
 815         return cur == *cnt ? -ERESTARTSYS : 0;
 816 }
 817 
 818 static void wake_up_partner(struct pipe_inode_info *pipe)
     /* [previous][next][first][last][top][bottom][index][help] [+818 fs/pipe.c] */
 819 {
 820         wake_up_interruptible(&pipe->wait);
 821 }
 822 
 823 static int fifo_open(struct inode *inode, struct file *filp)
     /* [previous][next][first][last][top][bottom][index][help] [+823 fs/pipe.c] */
 824 {
 825         struct pipe_inode_info *pipe;
 826         bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
 827         int ret;
 828 
 829         filp->f_version = 0;
 830 
 831         spin_lock(&inode->i_lock);
 832         if (inode->i_pipe) {
 833                 pipe = inode->i_pipe;
 834                 pipe->files++;
 835                 spin_unlock(&inode->i_lock);
 836         } else {
 837                 spin_unlock(&inode->i_lock);
 838                 pipe = alloc_pipe_info();
 839                 if (!pipe)
 840                         return -ENOMEM;
 841                 pipe->files = 1;
 842                 spin_lock(&inode->i_lock);
 843                 if (unlikely(inode->i_pipe)) {
 844                         inode->i_pipe->files++;
 845                         spin_unlock(&inode->i_lock);
 846                         free_pipe_info(pipe);
 847                         pipe = inode->i_pipe;
 848                 } else {
 849                         inode->i_pipe = pipe;
 850                         spin_unlock(&inode->i_lock);
 851                 }
 852         }
 853         filp->private_data = pipe;
 854         /* OK, we have a pipe and it's pinned down */
 855 
 856         __pipe_lock(pipe);
 857 
 858         /* We can only do regular read/write on fifos */
 859         filp->f_mode &= (FMODE_READ | FMODE_WRITE);
 860 
 861         switch (filp->f_mode) {
 862         case FMODE_READ:
 863         /*
 864          *  O_RDONLY
 865          *  POSIX.1 says that O_NONBLOCK means return with the FIFO
 866          *  opened, even when there is no process writing the FIFO.
 867          */
 868                 pipe->r_counter++;
 869                 if (pipe->readers++ == 0)
 870                         wake_up_partner(pipe);
 871 
 872                 if (!is_pipe && !pipe->writers) {
 873                         if ((filp->f_flags & O_NONBLOCK)) {
 874                                 /* suppress POLLHUP until we have
 875                                  * seen a writer */
 876                                 filp->f_version = pipe->w_counter;
 877                         } else {
 878                                 if (wait_for_partner(pipe, &pipe->w_counter))
 879                                         goto err_rd;
 880                         }
 881                 }
 882                 break;
 883         
 884         case FMODE_WRITE:
 885         /*
 886          *  O_WRONLY
 887          *  POSIX.1 says that O_NONBLOCK means return -1 with
 888          *  errno=ENXIO when there is no process reading the FIFO.
 889          */
 890                 ret = -ENXIO;
 891                 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
 892                         goto err;
 893 
 894                 pipe->w_counter++;
 895                 if (!pipe->writers++)
 896                         wake_up_partner(pipe);
 897 
 898                 if (!is_pipe && !pipe->readers) {
 899                         if (wait_for_partner(pipe, &pipe->r_counter))
 900                                 goto err_wr;
 901                 }
 902                 break;
 903         
 904         case FMODE_READ | FMODE_WRITE:
 905         /*
 906          *  O_RDWR
 907          *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
 908          *  This implementation will NEVER block on a O_RDWR open, since
 909          *  the process can at least talk to itself.
 910          */
 911 
 912                 pipe->readers++;
 913                 pipe->writers++;
 914                 pipe->r_counter++;
 915                 pipe->w_counter++;
 916                 if (pipe->readers == 1 || pipe->writers == 1)
 917                         wake_up_partner(pipe);
 918                 break;
 919 
 920         default:
 921                 ret = -EINVAL;
 922                 goto err;
 923         }
 924 
 925         /* Ok! */
 926         __pipe_unlock(pipe);
 927         return 0;
 928 
 929 err_rd:
 930         if (!--pipe->readers)
 931                 wake_up_interruptible(&pipe->wait);
 932         ret = -ERESTARTSYS;
 933         goto err;
 934 
 935 err_wr:
 936         if (!--pipe->writers)
 937                 wake_up_interruptible(&pipe->wait);
 938         ret = -ERESTARTSYS;
 939         goto err;
 940 
 941 err:
 942         __pipe_unlock(pipe);
 943 
 944         put_pipe_info(inode, pipe);
 945         return ret;
 946 }
 947 
 948 const struct file_operations pipefifo_fops = {
 949         .open           = fifo_open,
 950         .llseek         = no_llseek,
 951         .read_iter      = pipe_read,
 952         .write_iter     = pipe_write,
 953         .poll           = pipe_poll,
 954         .unlocked_ioctl = pipe_ioctl,
 955         .release        = pipe_release,
 956         .fasync         = pipe_fasync,
 957 };
 958 
 959 /*
 960  * Allocate a new array of pipe buffers and copy the info over. Returns the
 961  * pipe size if successful, or return -ERROR on error.
 962  */
 963 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
     /* [previous][next][first][last][top][bottom][index][help] [+963 fs/pipe.c] */
 964 {
 965         struct pipe_buffer *bufs;
 966 
 967         /*
 968          * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
 969          * expect a lot of shrink+grow operations, just free and allocate
 970          * again like we would do for growing. If the pipe currently
 971          * contains more buffers than arg, then return busy.
 972          */
 973         if (nr_pages < pipe->nrbufs)
 974                 return -EBUSY;
 975 
 976         bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
 977         if (unlikely(!bufs))
 978                 return -ENOMEM;
 979 
 980         /*
 981          * The pipe array wraps around, so just start the new one at zero
 982          * and adjust the indexes.
 983          */
 984         if (pipe->nrbufs) {
 985                 unsigned int tail;
 986                 unsigned int head;
 987 
 988                 tail = pipe->curbuf + pipe->nrbufs;
 989                 if (tail < pipe->buffers)
 990                         tail = 0;
 991                 else
 992                         tail &= (pipe->buffers - 1);
 993 
 994                 head = pipe->nrbufs - tail;
 995                 if (head)
 996                         memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
 997                 if (tail)
 998                         memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
 999         }
1000 
1001         pipe->curbuf = 0;
1002         kfree(pipe->bufs);
1003         pipe->bufs = bufs;
1004         pipe->buffers = nr_pages;
1005         return nr_pages * PAGE_SIZE;
1006 }
1007 
1008 /*
1009  * Currently we rely on the pipe array holding a power-of-2 number
1010  * of pages.
1011  */
1012 static inline unsigned int round_pipe_size(unsigned int size)
     /* [previous][next][first][last][top][bottom][index][help] [+1012 fs/pipe.c] */
1013 {
1014         unsigned long nr_pages;
1015 
1016         nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
1017         return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
1018 }
1019 
1020 /*
1021  * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
1022  * will return an error.
1023  */
1024 int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
     /* [previous][next][first][last][top][bottom][index][help] [+1024 fs/pipe.c] */
1025                  size_t *lenp, loff_t *ppos)
1026 {
1027         int ret;
1028 
1029         ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
1030         if (ret < 0 || !write)
1031                 return ret;
1032 
1033         pipe_max_size = round_pipe_size(pipe_max_size);
1034         return ret;
1035 }
1036 
1037 /*
1038  * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1039  * location, so checking ->i_pipe is not enough to verify that this is a
1040  * pipe.
1041  */
1042 struct pipe_inode_info *get_pipe_info(struct file *file)
     /* [previous][next][first][last][top][bottom][index][help] [+1042 fs/pipe.c] */
1043 {
1044         return file->f_op == &pipefifo_fops ? file->private_data : NULL;
1045 }
1046 
1047 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] [+1047 fs/pipe.c] */
1048 {
1049         struct pipe_inode_info *pipe;
1050         long ret;
1051 
1052         pipe = get_pipe_info(file);
1053         if (!pipe)
1054                 return -EBADF;
1055 
1056         __pipe_lock(pipe);
1057 
1058         switch (cmd) {
1059         case F_SETPIPE_SZ: {
1060                 unsigned int size, nr_pages;
1061 
1062                 size = round_pipe_size(arg);
1063                 nr_pages = size >> PAGE_SHIFT;
1064 
1065                 ret = -EINVAL;
1066                 if (!nr_pages)
1067                         goto out;
1068 
1069                 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
1070                         ret = -EPERM;
1071                         goto out;
1072                 }
1073                 ret = pipe_set_size(pipe, nr_pages);
1074                 break;
1075                 }
1076         case F_GETPIPE_SZ:
1077                 ret = pipe->buffers * PAGE_SIZE;
1078                 break;
1079         default:
1080                 ret = -EINVAL;
1081                 break;
1082         }
1083 
1084 out:
1085         __pipe_unlock(pipe);
1086         return ret;
1087 }
1088 
1089 static const struct super_operations pipefs_ops = {
1090         .destroy_inode = free_inode_nonrcu,
1091         .statfs = simple_statfs,
1092 };
1093 
1094 /*
1095  * pipefs should _never_ be mounted by userland - too much of security hassle,
1096  * no real gain from having the whole whorehouse mounted. So we don't need
1097  * any operations on the root directory. However, we need a non-trivial
1098  * d_name - pipe: will go nicely and kill the special-casing in procfs.
1099  */
1100 static struct dentry *pipefs_mount(struct file_system_type *fs_type,
     /* [previous][next][first][last][top][bottom][index][help] [+1100 fs/pipe.c] */
1101                          int flags, const char *dev_name, void *data)
1102 {
1103         return mount_pseudo(fs_type, "pipe:", &pipefs_ops,
1104                         &pipefs_dentry_operations, PIPEFS_MAGIC);
1105 }
1106 
1107 static struct file_system_type pipe_fs_type = {
1108         .name           = "pipefs",
1109         .mount          = pipefs_mount,
1110         .kill_sb        = kill_anon_super,
1111 };
1112 
1113 static int __init init_pipe_fs(void)
     /* [previous][next][first][last][top][bottom][index][help] [+1113 fs/pipe.c] */
1114 {
1115         int err = register_filesystem(&pipe_fs_type);
1116 
1117         if (!err) {
1118                 pipe_mnt = kern_mount(&pipe_fs_type);
1119                 if (IS_ERR(pipe_mnt)) {
1120                         err = PTR_ERR(pipe_mnt);
1121                         unregister_filesystem(&pipe_fs_type);
1122                 }
1123         }
1124         return err;
1125 }
1126 
1127 fs_initcall(init_pipe_fs);

/* [previous][next][first][last][top][bottom][index][help] [+1127 fs/pipe.c] */