insmod 入口函數
本文用到的 busybox 版本為 1.34.1,Linux 內核版本為 4.14.294
insmod_main()
函數是 insmod 命令的入口函數,該函數首先通過函數參數獲取被加載模塊的名字并存入局部指針變量 filename,然后調用bb_init_module()
函數進行后續操作。
int insmod_main(int argc UNUSED_PARAM, char **argv)
{
char *filename;
int rc;
/* Compat note:
* 2.6 style insmod has no options and required filename
* (not module name - .ko can't be omitted).
* 2.4 style insmod can take module name without .o
* and performs module search in default directories
* or in $MODPATH.
*/
IF_FEATURE_2_4_MODULES(
getopt32(argv, INSMOD_OPTS INSMOD_ARGS);
argv += optind - 1;
);
filename = *++argv;
if (!filename)
bb_show_usage();
rc = bb_init_module(filename, parse_cmdline_module_options(argv, /*quote_spaces:*/ 0));
if (rc)
bb_error_msg("can't insert '%s': %s", filename, moderror(rc));
return rc;
}
模塊參數解析函數
parse_cmdline_module_options()
函數會解析模塊加載時傳給模塊的參數,通過while
循環挨個解析模塊后面傳給模塊的參數,并將解析出來的參數值val
存入指針變量options
指向的內存空間,最后返回該內存空間的首地址。
char* FAST_FUNC parse_cmdline_module_options(char **argv, int quote_spaces)
{
char *options;
int optlen;
options = xzalloc(1);
optlen = 0;
while (*++argv) {
const char *fmt;
const char *var;
const char *val;
var = *argv;
options = xrealloc(options, optlen + 2 + strlen(var) + 2);
fmt = "%.*s%s ";
val = strchrnul(var, '=');
if (quote_spaces) {
/*
* modprobe (module-init-tools version 3.11.1) compat:
* quote only value:
* var="val with spaces", not "var=val with spaces"
* (note: var *name* is not checked for spaces!)
*/
if (*val) { /* has var=val format. skip '=' */
val++;
if (strchr(val, ' '))
fmt = "%.*s\"%s\" ";
}
}
optlen += sprintf(options + optlen, fmt, (int)(val - var), var, val);
}
/* Remove trailing space. Disabled */
/* if (optlen != 0) options[optlen-1] = '\\0'; */
return options;
}
映射模塊文件
bb_init_module()
函數首先判斷模塊有沒有參數傳入,調用try_to_mmap_module()
函數完成后續映射工作,該函數接收兩個參數:被加載模塊的名字(filename),模塊的大小(image_size)作為出參參數傳入。最后調用init_module()
函數,init_module()
函數是系統調用函數,對應的內核函數是sys_init_module()
函數,進入到內核空間。傳入的參數分別是:模塊內存空間首地址(image),模塊大小(image_size),模塊參數內存空間首地址(options)。
int FAST_FUNC bb_init_module(const char *filename, const char *options)
{
size_t image_size;
char *image;
int rc;
bool mmaped;
if (!options)
options = "";
//TODO: audit bb_init_module_24 to match error code convention
#if ENABLE_FEATURE_2_4_MODULES
if (get_linux_version_code() < KERNEL_VERSION(2,6,0))
return bb_init_module_24(filename, options);
#endif
/*
* First we try finit_module if available. Some kernels are configured
* to only allow loading of modules off of secure storage (like a read-
* only rootfs) which needs the finit_module call. If it fails, we fall
* back to normal module loading to support compressed modules.
*/
# ifdef __NR_finit_module
{
int fd = open(filename, O_RDONLY | O_CLOEXEC);
if (fd >= 0) {
rc = finit_module(fd, options, 0) != 0;
close(fd);
if (rc == 0)
return rc;
}
}
# endif
image_size = INT_MAX - 4095;
mmaped = 0;
image = try_to_mmap_module(filename, &image_size);
if (image) {
mmaped = 1;
} else {
errno = ENOMEM; /* may be changed by e.g. open errors below */
image = xmalloc_open_zipped_read_close(filename, &image_size);
if (!image)
return -errno;
}
errno = 0;
init_module(image, image_size, options);
rc = errno;
if (mmaped)
munmap(image, image_size);
else
free(image);
return rc;
}
try_to_mmap_module()
函數首先打開模塊文件獲取模塊文件描述符fd
,然后通過fstat()
函數獲取模塊文件的詳細信息,判斷模塊文件的大小st_size
是否超過了設定的文件最大值,調用mmap_read()
函數以只讀的方式將模塊文件的內容映射進內存空間,并返回該內存空間的首地址,通過*(uint32_t*)image != SWAP_BE32(0x7f454C46)
檢查模塊文件是否符號 ELF 標準格式,最后將內存空間的首地址image
返回。通過try_to_mmap_module()
函數我們就獲取了模塊文件內容在內存空間的地址。
void* FAST_FUNC try_to_mmap_module(const char *filename, size_t *image_size_p)
{
/* We have user reports of failure to load 3MB module
* on a 16MB RAM machine. Apparently even a transient
* memory spike to 6MB during module load
* is too big for that system. */
void *image;
struct stat st;
int fd;
fd = xopen(filename, O_RDONLY);
fstat(fd, &st);
image = NULL;
/* st.st_size is off_t, we can't just pass it to mmap */
if (st.st_size <= *image_size_p) {
size_t image_size = st.st_size;
image = mmap_read(fd, image_size);
if (image == MAP_FAILED) {
image = NULL;
} else if (*(uint32_t*)image != SWAP_BE32(0x7f454C46)) {
/* No ELF signature. Compressed module? */
munmap(image, image_size);
image = NULL;
} else {
/* Success. Report the size */
*image_size_p = image_size;
}
}
close(fd);
return image;
}
從init_module()
開始調用關系會進入到 Linux 內核源碼。
init_module()
其實是一個宏定義,最終會調用到__NR_init_module
系統調用號對應的系統調用函數是sys_init_module()
,該對應關系位于 Linux 內核源碼include/uapi/asm-generic/unistd.h
文件中。關于 Linux 系統調用的知識,后面會專門寫個文章分析 Linux 系統調用的實現機制,并手寫一個內核沒有的系統調用。
#define init_module(mod, len, opts) syscall(__NR_init_module, mod, len, opts)
#define __NR_init_module 105
__SYSCALL(__NR_init_module, sys_init_module)
而sys_init_module()
函數是由宏定義SYSCALL_DEFINE3
展開形成的,該定義位于文件include/linux/syscalls.h
中
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINEx(x, sname, ...) \\
SYSCALL_METADATA(sname, x, __VA_ARGS__) \\
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
#define __SYSCALL_DEFINEx(x, name, ...) \\
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \\
__attribute__((alias(__stringify(SyS##name)))); \\
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \\
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \\
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \\
{ \\
long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \\
__MAP(x,__SC_TEST,__VA_ARGS__); \\
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \\
return ret; \\
} \\
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
SYSCALL_DEFINE3
的實現位于kernel/module.c
文件中,該函數首先調用may_init_module()
函數判斷用戶是否有加載模塊的權限,調用copy_module_from_user()
函數將模塊文件的內容從用戶空間內存地址拷貝到內核空間內存地址,具體實現后面會分析,最后調用load_module()
函數,細節詳見下面分析。
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
{
int err;
struct load_info info = { };
err = may_init_module();
if (err)
return err;
pr_debug("init_module: umod=%p, len=%lu, uargs=%p\\n",
umod, len, uargs);
err = copy_module_from_user(umod, len, &info);
if (err)
return err;
return load_module(&info, uargs, 0);
}
copy_module_from_user()
函數首先給load_info
結構體成員info->len
賦值為模塊大小len
,調用__vmalloc()
函數在內核空間為模塊分配info->len
大小的內存空間,并返回內核內存空間的的起始地址info->hdr
,最后調用copy_chunked_from_user()
函數其實就是copy_from_user()
函數將用戶空間內存模塊文件內容拷貝到info->hdr
所指向的內核空間內存地址
static int copy_module_from_user(const void __user *umod, unsigned long len,
struct load_info *info)
{
int err;
info- >len = len;
if (info- >len < sizeof(*(info- >hdr)))
return -ENOEXEC;
err = security_kernel_read_file(NULL, READING_MODULE);
if (err)
return err;
/* Suck in entire file: we'll want most of it. */
info- >hdr = __vmalloc(info- >len,
GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL);
if (!info- >hdr)
return -ENOMEM;
if (copy_chunked_from_user(info- >hdr, umod, info- >len) != 0) {
vfree(info- >hdr);
return -EFAULT;
}
return 0;
}
至此,模塊文件已經從用戶空間拷貝到內核空間。
模塊加載
鑒于模塊加載函數load_module()
比較復雜,限于篇幅限制,具體的加載過程會在《Linux內核模塊加載深度剖析(中篇)》一文中分析。
static int load_module(struct load_info *info, const char __user *uargs,
int flags)
{
struct module *mod;
long err;
char *after_dashes;
err = module_sig_check(info, flags);
if (err)
goto free_copy;
err = elf_header_check(info);
if (err)
goto free_copy;
/* Figure out module layout, and allocate all the memory. */
mod = layout_and_allocate(info, flags);
if (IS_ERR(mod)) {
err = PTR_ERR(mod);
goto free_copy;
}
audit_log_kern_module(mod- >name);
/* Reserve our place in the list. */
err = add_unformed_module(mod);
if (err)
goto free_module;
#ifdef CONFIG_MODULE_SIG
mod- >sig_ok = info- >sig_ok;
if (!mod- >sig_ok) {
pr_notice_once("%s: module verification failed: signature "
"and/or required key missing - tainting "
"kernel\\n", mod- >name);
add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK);
}
#endif
/* To avoid stressing percpu allocator, do this once we're unique. */
err = percpu_modalloc(mod, info);
if (err)
goto unlink_mod;
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
goto unlink_mod;
init_param_lock(mod);
/* Now we've got everything in the final locations, we can
* find optional sections. */
err = find_module_sections(mod, info);
if (err)
goto free_unload;
err = check_module_license_and_versions(mod);
if (err)
goto free_unload;
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, info);
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(mod, info);
if (err < 0)
goto free_modinfo;
err = apply_relocations(mod, info);
if (err < 0)
goto free_modinfo;
err = post_relocation(mod, info);
if (err < 0)
goto free_modinfo;
flush_module_icache(mod);
/* Now copy in args */
mod- >args = strndup_user(uargs, ~0UL > > 1);
if (IS_ERR(mod- >args)) {
err = PTR_ERR(mod- >args);
goto free_arch_cleanup;
}
dynamic_debug_setup(mod, info- >debug, info- >num_debug);
/* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
ftrace_module_init(mod);
/* Finally it's fully formed, ready to start executing. */
err = complete_formation(mod, info);
if (err)
goto ddebug_cleanup;
err = prepare_coming_module(mod);
if (err)
goto bug_cleanup;
/* Module is ready to execute: parsing args may do that. */
after_dashes = parse_args(mod- >name, mod- >args, mod- >kp, mod- >num_kp,
-32768, 32767, mod,
unknown_module_param_cb);
if (IS_ERR(after_dashes)) {
err = PTR_ERR(after_dashes);
goto coming_cleanup;
} else if (after_dashes) {
pr_warn("%s: parameters '%s' after `--' ignored\\n",
mod- >name, after_dashes);
}
/* Link in to sysfs. */
err = mod_sysfs_setup(mod, info, mod- >kp, mod- >num_kp);
if (err < 0)
goto coming_cleanup;
if (is_livepatch_module(mod)) {
err = copy_module_elf(mod, info);
if (err < 0)
goto sysfs_cleanup;
}
/* Get rid of temporary copy. */
free_copy(info);
/* Done! */
trace_module_load(mod);
return do_init_module(mod);
sysfs_cleanup:
mod_sysfs_teardown(mod);
coming_cleanup:
mod- >state = MODULE_STATE_GOING;
destroy_params(mod- >kp, mod- >num_kp);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
klp_module_going(mod);
bug_cleanup:
mod- >state = MODULE_STATE_GOING;
/* module_bug_cleanup needs module_mutex protection */
mutex_lock(&module_mutex);
module_bug_cleanup(mod);
mutex_unlock(&module_mutex);
/* we can't deallocate the module until we clear memory protection */
module_disable_ro(mod);
module_disable_nx(mod);
ddebug_cleanup:
dynamic_debug_remove(mod, info- >debug);
synchronize_sched();
kfree(mod- >args);
free_arch_cleanup:
module_arch_cleanup(mod);
free_modinfo:
free_modinfo(mod);
free_unload:
module_unload_free(mod);
unlink_mod:
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod- >list);
mod_tree_remove(mod);
wake_up_all(&module_wq);
/* Wait for RCU-sched synchronizing before releasing mod- >list. */
synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
/*
* Ftrace needs to clean up what it initialized.
* This does nothing if ftrace_module_init() wasn't called,
* but it must be called outside of module_mutex.
*/
ftrace_release_mod(mod);
/* Free lock-classes; relies on the preceding sync_rcu() */
lockdep_free_key_range(