本文共 8045 字,大约阅读时间需要 26 分钟。
为什么要将系统调用定义成宏SYSCALL_DEFINEx?bug CVE-2009-0029,CVE-2010-3301的存在: Linux 2.6.28及以前版本的内核中,将系统调用中32位参数传入64位的寄存器时无法作符号扩展,可能导致系统崩溃或提权漏洞。
内核开发者通过将系统调用的所有输入参数都先转化成long类型(64位),再强制转化到相应的类型来规避这个漏洞。// 定义位置:\include\linux\syscalls.h#define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long sys_##sname(void)#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)#define SYSCALL_DEFINEx(x, sname, ...) \ SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)#define __PROTECT(...) asmlinkage_protect(__VA_ARGS__) #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(SyS##name)))); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
“##”是连接符,__VA_ARGS__代表前面“…”里面的可变参数
SYSCALL_DEFINEx里面的x代表的是系统调用的参数个数scoket是我们常用的函数,其对应的系统调用在\net\Scoket.c定义,函数部分如下:
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol){ int retval; struct socket *sock; int flags; /* Check the SOCK_* constants for consistency. */ BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); ...}
__SYSCALL_DEFINEx(3, _socket, int, family, int, type, int, protocol) \
由于 SYSCALL_DEFINEx 调用了 SYSCALL_METADATA 和 __SYSCALL_DEFINEx
由于 SYSCALL_METADATA 是跟踪系统调用的, 因此只关注 __SYSCALL_DEFINExasmlinkage long sys_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol)) \ __attribute__((alias(__stringify(SyS_socket)))); \ static inline long SYSC_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol)); \ asmlinkage long SyS_socket(__MAP(x,__SC_LONG,int, family, int, type, int, protocol)); \ asmlinkage long SyS_socket(__MAP(x,__SC_LONG,int, family, int, type, int, protocol)) \ { \ long ret = SYSC_socket(__MAP(x,__SC_CAST,int, family, int, type, int, protocol)); \ __MAP(x,__SC_TEST,int, family, int, type, int, protocol); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,int, family, int, type, int, protocol)); \ return ret; \ } \ static inline long SYSC_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol))
第一行的sys_scoket只是函数声明,最后一行SYSC_socket才是函数定义,因为其没有分号,再加上SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)之后函数体,明确无误是真正的socket系统调用函数定义
SyS_socket里面又调用到了SYSC_socket了,为何要使用宏定义这样绕来绕去的? 关键在于__MAP、__SC_DECL、__SC_LONG、__SC_CAST、__SC_TEST、__SC_ARGS这几个宏。// 定义位置:\include\linux\syscalls.h#define __MAP0(m,...)#define __MAP1(m,t,a) m(t,a)#define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__)#define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__)#define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__)#define __MAP5(m,t,a,...) m(t,a), __MAP4(m,__VA_ARGS__)#define __MAP6(m,t,a,...) m(t,a), __MAP5(m,__VA_ARGS__)#define __MAP(n,...) __MAP##n(__VA_ARGS__)#define __SC_DECL(t, a) t a#define __TYPE_IS_L(t) (__same_type((t)0, 0L))#define __TYPE_IS_UL(t) (__same_type((t)0, 0UL))// t为long long或unsigned long long 返回真,t为int或long则返回假#define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL))#define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a#define __SC_CAST(t, a) (t) a#define __SC_ARGS(t, a) a#define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long))
// 定义位置:\include\linux/bug.h/* 其中当e为非0的时候,经过两次非操作得到的结果是1,加上符号就是-1,struct的位字段不允许位负,这样在编译的时候就会报错;当e=0的时候,经过两次非操作仍然是0,-0的结果还是0.这样可以在编译的时候发现错误*/#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
__MAP(x,__SC_DECL,int, family, int, type, int, protocol)宏展开后: int family, int type,int protocol
__MAP3(__SC_DECL,int, family, int, type, int, protocol) ==> __SC_DECL(int, family),__MAP2(__SC_DECL,int, type, int, protocol) ==>__SC_DECL(int, family),__SC_DECL(int, type),__SC_DECL(int, protocol) ==> int family,int type,int protocol
__MAP(x,__SC_LONG,int, family, int, type, int, protocol)宏展开后: long family, long type,long protocol
__MAP3(__SC_LONG,int, family, int, type, int, protocol) ==> __SC_LONG(int, family),__SC_LONG(int, type),__SC_LONG(int, protocol) ==> long family, long type,long protocol
其中__typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L))意思是如果t是long long类型就返回long long,否者返回long数据类型(64bit)
__MAP(x,__SC_CAST,int, family, int, type, int, protocol)宏展开后: (int) family, (int) type,(int) protocol
__MAP(x,__SC_CAST,int, family, int, type, int, protocol) ==> __SC_CAST(int, family),__SC_CAST(int, type),__SC_CAST(int, protocol) ==> (int) family, (int) type,(int) protocol
__MAP(x,__SC_TEST,int, family, int, type, int, protocol)宏展开后: (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0)
__MAP(x,__SC_TEST,int, family, int, type, int, protocol) ==> __SC_TEST(int, family),__SC_TEST(int, type),__SC_TEST(int, protocol) ==> (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0)
__MAP(x,__SC_ARGS,int, family, int, type, int, protocol)宏展开后: family, type, protoco
==> __SC_ARGS(int, family),__SC_ARGS(int, type),__SC_ARGS(int, protocol) ==> family, type, protoco
__PROTECT(x, ret,__MAP(x,__SC_ARGS,int, family, int, type, int, protocol))宏展开后: asmlinkage_protect(x, ret, family, type, protoco)
asmlinkage_protect(x, ret, family, type, protoco) ==> __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), "m" (family), "m" (type), "m" (protoco));
的主要作用是避免系统调用的返回值(eax)被编译器优化掉,详情如下:
asmlinkage long sys_socket(__MAP(x,__SC_DECL,int, family, int, type, int, protocol)) \ __attribute__((alias(__stringify(SyS_socket)))); \
扩展为:
asmlinkage long sys_socket(int family,int type,int protocol) _attribute__((alias("SyS_socket"))); // 将sys_socket设置为SyS_socket函数的别名,调用sys_socket就是调用SyS_socket
一些说明:
// 定义位置:\include\linux#define __stringify_1(x...) #x // #的作用是将宏参数字符串化,如:#do = > "do"#define __stringify(x...) __stringify_1(x)
因此:__stringify(SyS_socket) ==> “SyS_socket”
最后,完整的扩展为:
#define __SYSCALL_DEFINEx(3, _socket, int, family, int, type, int, protocol) \asmlinkage long sys_socket(int family,int type,int protocol) _attribute__((alias("SyS_socket")));\static inline long SYSC_socket(int family,int type,int protocol);asmlinkage long SyS_socket(long family, long type,long protocol); \ asmlinkage long SyS_socket(long family, long type,long protocol) \ { \ long ret = SYSC_socket((int) family, (int) type,(int) protocol); \ (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0), (void)BUILD_BUG_ON_ZERO(0); \ __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), "m" (family), "m" (type), "m" (protoco)); \ return ret; \ } \ static inline long SYSC_socket(int family,int type,int protocol) { int retval; struct socket *sock; int flags; /* Check the SOCK_* constants for consistency. */ BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); ...}
系统调用sys_socket函数时,其实是调用SyS_socket函数,SyS_socket函数内部又调用SYSC_socket函数(真正执行socket系统调用代码),最后将没有优化的返回值ret返回
转载地址:http://kwerb.baihongyu.com/