本文最后更新于:2020年7月1日 晚上

* 看了一段时间Linux内核源代码了,经常会在代码中看到down()、up()、spin_lock()、spin_unlock()、read_lock()、write_lock()、read_unlock()、write_unlock()等函数。本篇就先来看down()、up()是干什么的。。。它们的底层都是如何实现的。。。→_→ *

  1. down()(P操作)

    内核中通过信号量(semaphore)来实现进程间对共享资源的互斥访问,提供了down()函数(P操作)和up()函数(V操作)

    • 内核中信号量的数据结构

      //linux-2.4.0\include\asm-i386\Semaphore.h
      struct semaphore {
      	atomic_t count;//计数器,表示可用资源的数量
      	int sleepers;//等待进程的数量(其实只代表有没有进程等待)
      	wait_queue_head_t wait;//进程的等待队列
      #if WAITQUEUE_DEBUG
      	long __magic;
      #endif
      };
    • 初始化信号量

      #if WAITQUEUE_DEBUG
      # define __SEM_DEBUG_INIT(name) \
      		, (int)&(name).__magic
      #else
      # define __SEM_DEBUG_INIT(name)
      #endif
      
      //初始化count与等待队列
      #define __SEMAPHORE_INITIALIZER(name,count) \
      { ATOMIC_INIT(count), 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \
      	__SEM_DEBUG_INIT(name) }
      
      //初始化信号量
      #define __MUTEX_INITIALIZER(name) \
      	__SEMAPHORE_INITIALIZER(name,1)
      
      #define __DECLARE_SEMAPHORE_GENERIC(name,count) \
      	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
      
      //声明初始值为1的信号量
      #define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
      //声明初始值为0的信号量
      #define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
    • down()

      static inline void down(struct semaphore * sem)
      {
      #if WAITQUEUE_DEBUG
      	CHECK_MAGIC(sem->__magic);
      #endif
      
      	__asm__ __volatile__(
      		"# atomic down operation\n\t"
      		//锁总线,对count减1
      		LOCK "decl %0\n\t"     /* --sem->count */
      		"js 2f\n"
      		"1:\n"//此时count大于等于0,返回down(),进入临界区
      		".section .text.lock,\"ax\"\n"
      		"2:\tcall __down_failed\n\t"//此时count小于0,调用__down_failed
      		"jmp 1b\n"
      		".previous"
      		:"=m" (sem->count)
      		:"c" (sem)
      		:"memory");
      }
    • down_failed()中调用了down()

      void __down(struct semaphore * sem)
      {
      	struct task_struct *tsk = current;
      	DECLARE_WAITQUEUE(wait, tsk);
      	tsk->state = TASK_UNINTERRUPTIBLE;
      	//将当前进程的等待队列元素wait,链入队列头sem->wait的等待队列的尾部
      	add_wait_queue_exclusive(&sem->wait, &wait);
      
      	spin_lock_irq(&semaphore_lock);
      	sem->sleepers++;//将等待进入临界区的进程数加1
      	for (;;) {
      		int sleepers = sem->sleepers;
      
      		/*
      		 * Add "everybody else" into it. They aren't
      		 * playing, because we own the spinlock.
      		 */
      		 //执行__down()函数的进程是因为没有进入临界区,但此时可能有进程已经执行了up(),所以有必要再一次检查count,避免无谓的等待进入睡眠而浪费资源
      		 //atomic_add_negative()函数中执行sleepers-1加sem->count
      		 //若结果为负数,返回非零,表示进程需要继续等待
      		 //若结果不为负数,返回零,表示不需要等待,可以进入临界区
      		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
      			sem->sleepers = 0;//设置等待进程数为0
      			break;//跳出循环
      		}
      		sem->sleepers = 1;	/* us - see -1 above *///设置等待进程数为1,它在这里只表示有无进程需要等待,而不表示有多少进程需要等待
      		spin_unlock_irq(&semaphore_lock);
      
      		schedule();//准备将此进程调度为深度睡眠,即不会因为信号而唤醒
      		tsk->state = TASK_UNINTERRUPTIBLE;
      		spin_lock_irq(&semaphore_lock);
      	}
      	spin_unlock_irq(&semaphore_lock);
      	remove_wait_queue(&sem->wait, &wait);//将此进程移出等待队列
      	tsk->state = TASK_RUNNING;//设置此进程为运行状态
      	wake_up(&sem->wait);//返回之前唤醒等待队列中的其他进程
      }
  2. up()(V操作)

    • up()

      
      static inline void up(struct semaphore * sem)
      {
      #if WAITQUEUE_DEBUG
      	CHECK_MAGIC(sem->__magic);
      #endif
      	__asm__ __volatile__(
      		"# atomic up operation\n\t"
      		//锁总线,对count加1,这和前面的atomic_add_negative()函数的作用又对起来了
      		LOCK "incl %0\n\t"     /* ++sem->count */
      		"jle 2f\n"
      		"1:\n"
      		".section .text.lock,\"ax\"\n"
      		"2:\tcall __up_wakeup\n\t"//当count小于等于0时,调用__up_wakeup()
      		"jmp 1b\n"
      		".previous"
      		:"=m" (sem->count)
      		:"c" (sem)
      		:"memory");
      }
    • up_wakeup()中调用了up(),__up()中调用了wake_up()

      //wake_up()是宏函数,其中调用了__wake_up()函数
      #define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
    • __wake_up()

      //其中调用了__wake_up_common(),注意最后一个参数传的是0
      void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
      {
      	__wake_up_common(q, mode, wq_mode, 0);
      }
    • __wake_up_common()

      static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
      				     unsigned int wq_mode, const int sync)
      {
      	struct list_head *tmp, *head;
      	struct task_struct *p, *best_exclusive;
      	unsigned long flags;
      	int best_cpu, irq;
      
      	if (!q)
      		goto out;
      
      	best_cpu = smp_processor_id();
      	irq = in_interrupt();
      	best_exclusive = NULL;
      	wq_write_lock_irqsave(&q->lock, flags);
      
      	head = &q->task_list;
      	tmp = head->next;
      	while (tmp != head) {
      		unsigned int state;
                      wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
      		tmp = tmp->next;
      		p = curr->task;
      		state = p->state;
      		if (state & mode) {
      			/*
      			 * If waking up from an interrupt context then
      			 * prefer processes which are affine to this
      			 * CPU.
      			 */
      			 //此函数的作用就是遍历等待队列,依次唤醒符合条件的进程,如果唤醒的进程TASK_EXCLUSIVE为1,就停止唤醒其余进程,被唤醒的进程在__down()中继续执行
      			if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) {
      				if (!best_exclusive)
      					best_exclusive = p;
      				if (p->processor == best_cpu) {
      					best_exclusive = p;
      					break;
      				}
      			} else {
      				if (sync)
      					wake_up_process_synchronous(p);
      				else
      					wake_up_process(p);
      				if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)
      					break;
      			}
      		}
      	}
      	if (best_exclusive) {
      		if (sync)
      			wake_up_process_synchronous(best_exclusive);
      		else
      			wake_up_process(best_exclusive);
      	}
      	wq_write_unlock_irqrestore(&q->lock, flags);
      out:
      	return;
      }

* 先去吃个饭。。一会来继续写spin_lock()、spin_unlock()。。→_→ *