Linux 0.12中的系统调用。

1. 系统调用_syscalln

在Linux 0.12中,系统调用定义有4类,分别为_syscall0_syscall1_syscall2_syscall3(0-3表示携带的参数个数),定义如下(在linux-0.12/include/unistd.h文件中定义):

#define _syscall0(type,name) \
type name(void) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \
    : "0" (__NR_##name)); \
if (__res >= 0) \
    return (type) __res; \
errno = -__res; \
return -1; \
}

#define _syscall1(type,name,atype,a) \
type name(atype a) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \
    : "0" (__NR_##name),"b" ((long)(a))); \
if (__res >= 0) \
    return (type) __res; \
errno = -__res; \
return -1; \
}

#define _syscall2(type,name,atype,a,btype,b) \
type name(atype a,btype b) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \
    : "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b))); \
if (__res >= 0) \
    return (type) __res; \
errno = -__res; \
return -1; \
}

#define _syscall3(type,name,atype,a,btype,b,ctype,c) \
type name(atype a,btype b,ctype c) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \
    : "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); \
if (__res>=0) \
    return (type) __res; \
errno=-__res; \
return -1; \
}

1.1 _syscall0

_syscall0定义如下,不带任何参数:

#define _syscall0(type,name) \
type name(void) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \                    // 输出寄存器列表
    : "0" (__NR_##name)); \                // 输入寄存器列表
if (__res >= 0) \
    return (type) __res; \
errno = -__res; \
return -1; \
}
asm asm-qualifiers ( AssemblerTemplate 
                      : OutputOperands
                      : InputOperands
                      : Clobbers
                      : GotoLabels)
  • __volatile__,告诉编译器不要优化代码,直接从内存读取数据,而不是从cache
  • __NR_##name##表示将左侧和右侧的字符连联结在一起,举例,_syscall0(int,fork),此时为__NR_fork,在Linux 0.12中找到这些系统调用,
// linux-0.12/init/main.c
static inline _syscall0(int,fork)
static inline _syscall0(int,pause)
static inline _syscall0(int,sync)

 // linux-0.12/lib/setsid.c
_syscall0(pid_t,setsid)
  • int $0x80 ...,向内核发出一个中断调用int 0x80,开始执行一个系统调用。
    • 输出寄存器列表:"=a" (__res)=表示这是输出寄存器,a表示使用寄存器eax,执行完汇编写语句后,输出寄存器eax的值放入__res
    • 输入寄存器列表:"0" (__NR_##name));0表示使用与上面相同位置上的输出寄存器,即eax

1.2 _syscall3

write系统调用为例,

#define __LIBRARY__
#include <unistd.h>

_syscall3(int,write,int,fd,const char *,buf,off_t,count)

_syscall3linux-0.12/include/unistd.h定义,

#define _syscall3(type,name,atype,a,btype,b,ctype,c) \
type name(atype a,btype b,ctype c) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
    : "=a" (__res) \
    : "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); \
if (__res>=0) \
    return (type) __res; \
errno=-__res; \
return -1; \
}
  • 输入寄存器列表:b表示使用寄存器ebxc表示使用寄存器ecxd表示使用寄存器edx

1.3 系统调用功能号

Linux给每个系统调用指派一个唯一的系统调用功能号,在linux-0.12/include/unistd.h定义,以__NR_作为前缀,

#define __NR_setup    0    /* used only by init, to get system going */
#define __NR_exit    1
#define __NR_fork    2
#define __NR_read    3
#define __NR_write    4
#define __NR_open    5
#define __NR_close    6
#define __NR_waitpid    7
#define __NR_creat    8
#define __NR_link    9
#define __NR_unlink    10
#define __NR_execve    11
#define __NR_chdir    12
#define __NR_time    13
#define __NR_mknod    14
#define __NR_chmod    15
#define __NR_chown    16
#define __NR_break    17
#define __NR_stat    18
#define __NR_lseek    19
#define __NR_getpid    20
#define __NR_mount    21
#define __NR_umount    22
#define __NR_setuid    23
#define __NR_getuid    24
#define __NR_stime    25
#define __NR_ptrace    26
#define __NR_alarm    27
#define __NR_fstat    28
#define __NR_pause    29
#define __NR_utime    30
#define __NR_stty    31
#define __NR_gtty    32
#define __NR_access    33
#define __NR_nice    34
#define __NR_ftime    35
#define __NR_sync    36
#define __NR_kill    37
#define __NR_rename    38
#define __NR_mkdir    39
#define __NR_rmdir    40
#define __NR_dup    41
#define __NR_pipe    42
#define __NR_times    43
#define __NR_prof    44
#define __NR_brk    45
#define __NR_setgid    46
#define __NR_getgid    47
#define __NR_signal    48
#define __NR_geteuid    49
#define __NR_getegid    50
#define __NR_acct    51
#define __NR_phys    52
#define __NR_lock    53
#define __NR_ioctl    54
#define __NR_fcntl    55
#define __NR_mpx    56
#define __NR_setpgid    57
#define __NR_ulimit    58
#define __NR_uname    59
#define __NR_umask    60
#define __NR_chroot    61
#define __NR_ustat    62
#define __NR_dup2    63
#define __NR_getppid    64
#define __NR_getpgrp    65
#define __NR_setsid    66
#define __NR_sigaction    67
#define __NR_sgetmask    68
#define __NR_ssetmask    69
#define __NR_setreuid    70
#define __NR_setregid    71
#define __NR_sigsuspend    72
#define __NR_sigpending 73
#define __NR_sethostname 74
#define __NR_setrlimit    75
#define __NR_getrlimit    76
#define __NR_getrusage    77
#define __NR_gettimeofday 78
#define __NR_settimeofday 79
#define __NR_getgroups    80
#define __NR_setgroups    81
#define __NR_select    82
#define __NR_symlink    83
#define __NR_lstat    84
#define __NR_readlink    85
#define __NR_uselib    86

这些系统调用号对应于调用处理程序指针数组表sys_call_table[](在linux-0.12/include/linux/sys.h中定义),

extern int sys_setup();
extern int sys_exit();
extern int sys_fork();
extern int sys_read();
extern int sys_write();
extern int sys_open();
extern int sys_close();
extern int sys_waitpid();
extern int sys_creat();
extern int sys_link();
extern int sys_unlink();
extern int sys_execve();
extern int sys_chdir();
extern int sys_time();
extern int sys_mknod();
extern int sys_chmod();
extern int sys_chown();
extern int sys_break();
extern int sys_stat();
extern int sys_lseek();
extern int sys_getpid();
extern int sys_mount();
extern int sys_umount();
extern int sys_setuid();
extern int sys_getuid();
extern int sys_stime();
extern int sys_ptrace();
extern int sys_alarm();
extern int sys_fstat();
extern int sys_pause();
extern int sys_utime();
extern int sys_stty();
extern int sys_gtty();
extern int sys_access();
extern int sys_nice();
extern int sys_ftime();
extern int sys_sync();
extern int sys_kill();
extern int sys_rename();
extern int sys_mkdir();
extern int sys_rmdir();
extern int sys_dup();
extern int sys_pipe();
extern int sys_times();
extern int sys_prof();
extern int sys_brk();
extern int sys_setgid();
extern int sys_getgid();
extern int sys_signal();
extern int sys_geteuid();
extern int sys_getegid();
extern int sys_acct();
extern int sys_phys();
extern int sys_lock();
extern int sys_ioctl();
extern int sys_fcntl();
extern int sys_mpx();
extern int sys_setpgid();
extern int sys_ulimit();
extern int sys_uname();
extern int sys_umask();
extern int sys_chroot();
extern int sys_ustat();
extern int sys_dup2();
extern int sys_getppid();
extern int sys_getpgrp();
extern int sys_setsid();
extern int sys_sigaction();
extern int sys_sgetmask();
extern int sys_ssetmask();
extern int sys_setreuid();
extern int sys_setregid();
extern int sys_sigpending();
extern int sys_sigsuspend();
extern int sys_sethostname();
extern int sys_setrlimit();
extern int sys_getrlimit();
extern int sys_getrusage();
extern int sys_gettimeofday();
extern int sys_settimeofday();
extern int sys_getgroups();
extern int sys_setgroups();
extern int sys_select();
extern int sys_symlink();
extern int sys_lstat();
extern int sys_readlink();
extern int sys_uselib();

fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod,
sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount,
sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm,
sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access,
sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir,
sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid,
sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys,
sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit,
sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid,
sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask,
sys_setreuid,sys_setregid, sys_sigsuspend, sys_sigpending, sys_sethostname,
sys_setrlimit, sys_getrlimit, sys_getrusage, sys_gettimeofday, 
sys_settimeofday, sys_getgroups, sys_setgroups, sys_select, sys_symlink,
sys_lstat, sys_readlink, sys_uselib };

fn_ptr实为函数指针,在linux-0.12/include/linux/sched.h中定义typedef int (*fn_ptr)();

2. 系统调用处理

系统调用处理程序,linux-0.12/kernel/sys_call.s

_system_call:
    push %ds
    push %es
    push %fs
    pushl %eax        # save the orig_eax
    pushl %edx        
    pushl %ecx        # push %ebx,%ecx,%edx as parameters
    pushl %ebx        # to the system call
    movl $0x10,%edx        # set up ds,es to kernel space
    mov %dx,%ds
    mov %dx,%es
    movl $0x17,%edx        # fs points to local data space
    mov %dx,%fs
    cmpl _NR_syscalls,%eax
    jae bad_sys_call
    call _sys_call_table(,%eax,4)
    pushl %eax

call _sys_call_table(,%eax,4),实现程序跳转,跳转地址为_sys_call_table + %eax * 4eax存放的是系统调用功能号。

系统中断调用处理流程如下(图片来源于赵炯《Linux内核完全剖析——基于0.12内核》):

image-20230413172442654

2.1 系统调用返回后

从系统调用返回,对信号进行识别处理,处理后才退出中断过程。P286

ret_from_sys_call:
    movl _current,%eax
    cmpl _task,%eax            # task[0] cannot have signals
    je 3f
    cmpw $0x0f,CS(%esp)        # was old code segment supervisor ?
    jne 3f
    cmpw $0x17,OLDSS(%esp)        # was stack segment = 0x17 ?
    jne 3f
    movl signal(%eax),%ebx
    movl blocked(%eax),%ecx
    notl %ecx
    andl %ebx,%ecx
    bsfl %ecx,%ecx
    je 3f
    btrl %ecx,%ebx
    movl %ebx,signal(%eax)
    incl %ecx
    pushl %ecx
    call _do_signal
    popl %ecx
    testl %eax, %eax
    jne 2b        # see if we need to switch tasks, or do more signals
本文系Spark & Shine原创,转载需注明出处本文最近一次修改时间 2023-04-13 22:41

results matching ""

    No results matching ""