上一篇<
>分析了基于内核的虚拟机(KVM)是一种内建于Linux的开源虚拟化技术,每个虚拟资源(虚拟机)可以表示为虚拟用户,kvm通过获取影子(虚拟)物理位,模拟出专用的寄存器,及页回收、用户统计等功能。本篇分析一款主要基于amd、海光芯片的虚拟化技术,svm,全名为支持向量机(support vector machines)。
svm是一种虚拟机环境,可用于AMD64、海光64或Intel EM64T CPU。这种新型的虚拟机技术可以提高性能和节能,但您需要在使用任何使用它的软件之前启用它。
“支持向量机模式”是允许AMD处理器使用支持向量机指令集的功能。这对于需要同时兼容Intel、AMD和海光处理器的应用程序很有帮助。
svm通过创建kvm,注册svm_init_ops初始操作结构,完成硬件的一系列初始化及包含功能操作结构的访问方式。
svm_init_ops结构包括:
has_svm 检查amd 或 海光芯片是否支持svm
is_disabled 检查是否禁用功能
svm_hardware_setup svm硬件相关设置
svm_check_processor_compat 检查处理器兼容性
svm_x86_ops svm(x86)操作结构
amd_pmu_ops amd性能监控单元操作
svm硬件相关设置:
获取待分配页的订单大小,如果启用了NX大页面缓解,则影子分页和NPT都需要NX,分配页,获取页面映射的虚拟地址,填充内存(0xFF,每字节),获取kvm模式特征寄存器列表中的成员,经过偏移运算后写入msrpm_offsets[MSRPM_OFFSETS]列表,获取/设置标志位,并放入kvm_uret_msrs_list列表,检查暂停过滤支持,包括滤波暂停截距、暂停过滤器阈值,KVM的MMU本身不支持使用2级分页,因此如果主机使用2级寻呼,则不支持NPT,因为主机CR4在VMRUN上保持不变,强制VM NPT级别等于主机的分页级别,设置shadow_me_value和shadow_mo_mask,kvm内存管理单元设置掩码和值,sev硬件设置…,svm_hv硬件设置,设置刷新tlb相关函数,向量机cpu初始化…,avic硬件设置(包括LBR虚拟化支持),用于IOMMU驱动调用唤醒vcpu任务,svm设置cpu(寄存器)功能等等。
1. 函数分析
1.1 svm_init
2. 源码结构
3. 部分结构定义
4. 扩展函数/变量
svm特征的kvm初始化
svm 支持向量机(support vector machines)
static int __init svm_init(void)
{__unused_size_checks(); // 重要结构大小检查// 虚拟控制保存区域// 用户虚拟通讯保存区域// 安全加密虚拟化状态保存区域// 虚拟控制区域// 用户(来宾)虚拟层通信块return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),__alignof__(struct vcpu_svm), THIS_MODULE); // kvm初始化(svm注册及后续使用)
}
__unused_size_checks
svm_init_ops
svm_init_ops svm初始操作
static struct kvm_x86_init_ops svm_init_ops __initdata = {.cpu_has_kvm_support = has_svm, // 检查amd 或 海光芯片是否支持向量机.disabled_by_bios = is_disabled, // 检查是否禁用功能// 检查vm_cr设置位 // #define MSR_VM_CR 0xc0010114.hardware_setup = svm_hardware_setup, // svm硬件相关设置.check_processor_compatibility = svm_check_processor_compat, // svm检查处理器兼容性.runtime_ops = &svm_x86_ops, // svm(x86)操作结构.pmu_ops = &amd_pmu_ops, // amd性能监控单元操作
};
svm_hardware_setup
amd_pmu_ops
amd_pmu_ops amd性能监控单元操作
pmu 性能监控单元(performance monitoring unit)
struct kvm_pmu_ops amd_pmu_ops __initdata = {.hw_event_available = amd_hw_event_available, // 硬件事件可用.pmc_is_enabled = amd_pmc_is_enabled, // 通过将PMC与global_ctrl位进行比较,检查PMC是否已启用// 由于AMD CPU没有global_ctrl MSR,所有PMC都已启用(返回TRUE).pmc_idx_to_pmc = amd_pmc_idx_to_pmc, .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,.msr_idx_to_pmc = amd_msr_idx_to_pmc,.is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,.is_valid_msr = amd_is_valid_msr,.get_msr = amd_pmu_get_msr,.set_msr = amd_pmu_set_msr,.refresh = amd_pmu_refresh,.init = amd_pmu_init,.reset = amd_pmu_reset,
};
svm_direct_access_msrs svm模式特征寄存器访问通道
static const struct svm_direct_access_msrs {u32 index; /* MSR索引 */bool always; /* 如果最初清除拦截,则为true */
} direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {{ .index = MSR_STAR, .always = true }, // 传统模式SYSCALL目标{ .index = MSR_IA32_SYSENTER_CS, .always = true }, // cs{ .index = MSR_IA32_SYSENTER_EIP, .always = false }, // eip{ .index = MSR_IA32_SYSENTER_ESP, .always = false }, // esp
#ifdef CONFIG_X86_64{ .index = MSR_GS_BASE, .always = true }, // 64位GS基地址{ .index = MSR_FS_BASE, .always = true }, // 64位FS基地址 { .index = MSR_KERNEL_GS_BASE, .always = true }, // SwapGS GS影子{ .index = MSR_LSTAR, .always = true }, // 长模式SYSCALL目标{ .index = MSR_CSTAR, .always = true }, // 兼容模式SYSCALL目标{ .index = MSR_SYSCALL_MASK, .always = true }, // 系统调用的EFLAGS掩码
#endif{ .index = MSR_IA32_SPEC_CTRL, .always = false }, // 预测控制{ .index = MSR_IA32_PRED_CMD, .always = false }, // 预测命令{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, // 最后分支预测{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, // 最后分支跳转{ .index = MSR_IA32_LASTINTFROMIP, .always = false }, // 最后分支中断(预测){ .index = MSR_IA32_LASTINTTOIP, .always = false }, // 最后分支中断(执行){ .index = MSR_EFER, .always = false }, // 读取扩展功能寄存器,32 or 64位{ .index = MSR_IA32_CR_PAT, .always = false }, // 页属性表控制寄存器{ .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, // 安全加密虚拟化状态 用户(来宾)虚拟层通信块{ .index = MSR_TSC_AUX, .always = false }, // 辅助TSC// TSC 时间戳计数器(Time Stamp Counter){ .index = X2APIC_MSR(APIC_ID), .always = false }, apic id// #define X2APIC_MSR(x) (APIC_BASE_MSR + (x >> 4))// #define APIC_BASE_MSR 0x800// APIC 高级可编程中断控制器(Advanced Programmable Interrupt Controller){ .index = X2APIC_MSR(APIC_LVR), .always = false }, // 传统I2C虚拟寄存器{ .index = X2APIC_MSR(APIC_TASKPRI), .always = false }, // 任务优先权{ .index = X2APIC_MSR(APIC_ARBPRI), .always = false }, // 公断优先权{ .index = X2APIC_MSR(APIC_PROCPRI), .always = false }, // 处理器优先权{ .index = X2APIC_MSR(APIC_EOI), .always = false }, // 退出IO{ .index = X2APIC_MSR(APIC_RRR), .always = false },{ .index = X2APIC_MSR(APIC_LDR), .always = false }, // ldr { .index = X2APIC_MSR(APIC_DFR), .always = false },{ .index = X2APIC_MSR(APIC_SPIV), .always = false },{ .index = X2APIC_MSR(APIC_ISR), .always = false }, // 中断服务寄存器{ .index = X2APIC_MSR(APIC_TMR), .always = false },{ .index = X2APIC_MSR(APIC_IRR), .always = false }, // 中断请求寄存器{ .index = X2APIC_MSR(APIC_ESR), .always = false },{ .index = X2APIC_MSR(APIC_ICR), .always = false },{ .index = X2APIC_MSR(APIC_ICR2), .always = false },/** 注:* AMD不虚拟化APIC TSC期限计时器模式,但它由KVM模拟* 当设置APIC LVTT(0x832)寄存器位18时,中航工业硬件将生成GP故障* 因此,始终拦截MSR 0x832,不要设置direct_access_MSR*/{ .index = X2APIC_MSR(APIC_LVTTHMR), .always = false },{ .index = X2APIC_MSR(APIC_LVTPC), .always = false },{ .index = X2APIC_MSR(APIC_LVT0), .always = false },{ .index = X2APIC_MSR(APIC_LVT1), .always = false },{ .index = X2APIC_MSR(APIC_LVTERR), .always = false },{ .index = X2APIC_MSR(APIC_TMICT), .always = false },{ .index = X2APIC_MSR(APIC_TMCCT), .always = false },{ .index = X2APIC_MSR(APIC_TDCR), .always = false },{ .index = MSR_INVALID, .always = false },
};
svm_x86_ops svm(x86)操作结构
static struct kvm_x86_ops svm_x86_ops __initdata = {.name = "kvm_amd", 名称.hardware_unsetup = svm_hardware_unsetup, // 硬件移除设置(复原),包括释放页等.hardware_enable = svm_hardware_enable, // 设置硬件状态,并启动/刷新虚拟事件// 清除全局启用位,用更新的PERF_ctr_virt_mask重新编程PERF_CTL寄存器,然后再次设置全局启用位.hardware_disable = svm_hardware_disable, // 关闭当前CPU上的支持向量机,刷新虚拟事件// 我们只屏蔽掉仅主机位,以便在禁用SVM时仅主机计数工作如果有人在SVM被禁用时设置了仅来宾计数器,则仅来宾位仍然被设置,计数器将不计数任何内容.has_emulated_msr = svm_has_emulated_msr, // 判断是不是模拟器特征寄存器.vcpu_create = svm_vcpu_create,.vcpu_free = svm_vcpu_free,.vcpu_reset = svm_vcpu_reset,.vm_size = sizeof(struct kvm_svm),.vm_init = svm_vm_init,.vm_destroy = svm_vm_destroy,.prepare_switch_to_guest = svm_prepare_switch_to_guest,.vcpu_load = svm_vcpu_load,.vcpu_put = svm_vcpu_put,.vcpu_blocking = avic_vcpu_blocking,.vcpu_unblocking = avic_vcpu_unblocking,.update_exception_bitmap = svm_update_exception_bitmap,.get_msr_feature = svm_get_msr_feature,.get_msr = svm_get_msr,.set_msr = svm_set_msr,.get_segment_base = svm_get_segment_base,.get_segment = svm_get_segment,.set_segment = svm_set_segment,.get_cpl = svm_get_cpl,.get_cs_db_l_bits = svm_get_cs_db_l_bits,.set_cr0 = svm_set_cr0,.post_set_cr3 = sev_post_set_cr3,.is_valid_cr4 = svm_is_valid_cr4,.set_cr4 = svm_set_cr4,.set_efer = svm_set_efer,.get_idt = svm_get_idt,.set_idt = svm_set_idt,.get_gdt = svm_get_gdt,.set_gdt = svm_set_gdt,.set_dr7 = svm_set_dr7,.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,.cache_reg = svm_cache_reg,.get_rflags = svm_get_rflags,.set_rflags = svm_set_rflags,.get_if_flag = svm_get_if_flag,.flush_tlb_all = svm_flush_tlb_current,.flush_tlb_current = svm_flush_tlb_current,.flush_tlb_gva = svm_flush_tlb_gva,.flush_tlb_guest = svm_flush_tlb_current,.vcpu_pre_run = svm_vcpu_pre_run,.vcpu_run = svm_vcpu_run,.handle_exit = svm_handle_exit,.skip_emulated_instruction = svm_skip_emulated_instruction,.update_emulated_instruction = NULL,.set_interrupt_shadow = svm_set_interrupt_shadow,.get_interrupt_shadow = svm_get_interrupt_shadow,.patch_hypercall = svm_patch_hypercall,.inject_irq = svm_inject_irq,.inject_nmi = svm_inject_nmi,.inject_exception = svm_inject_exception,.cancel_injection = svm_cancel_injection,.interrupt_allowed = svm_interrupt_allowed,.nmi_allowed = svm_nmi_allowed,.get_nmi_mask = svm_get_nmi_mask,.set_nmi_mask = svm_set_nmi_mask,.enable_nmi_window = svm_enable_nmi_window,.enable_irq_window = svm_enable_irq_window,.update_cr8_intercept = svm_update_cr8_intercept,.set_virtual_apic_mode = avic_set_virtual_apic_mode,.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,.check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,.apicv_post_state_restore = avic_apicv_post_state_restore,.get_exit_info = svm_get_exit_info,.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,.has_wbinvd_exit = svm_has_wbinvd_exit,.get_l2_tsc_offset = svm_get_l2_tsc_offset,.get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,.write_tsc_offset = svm_write_tsc_offset,.write_tsc_multiplier = svm_write_tsc_multiplier,.load_mmu_pgd = svm_load_mmu_pgd,.check_intercept = svm_check_intercept,.handle_exit_irqoff = svm_handle_exit_irqoff,.request_immediate_exit = __kvm_request_immediate_exit,.sched_in = svm_sched_in,.nested_ops = &svm_nested_ops,.deliver_interrupt = svm_deliver_interrupt,.pi_update_irte = avic_pi_update_irte,.setup_mce = svm_setup_mce,.smi_allowed = svm_smi_allowed,.enter_smm = svm_enter_smm,.leave_smm = svm_leave_smm,.enable_smi_window = svm_enable_smi_window,.mem_enc_ioctl = sev_mem_enc_ioctl,.mem_enc_register_region = sev_mem_enc_register_region,.mem_enc_unregister_region = sev_mem_enc_unregister_region,.guest_memory_reclaimed = sev_guest_memory_reclaimed,.vm_copy_enc_context_from = sev_vm_copy_enc_context_from,.vm_move_enc_context_from = sev_vm_move_enc_context_from,.can_emulate_instruction = svm_can_emulate_instruction,.apic_init_signal_blocked = svm_apic_init_signal_blocked,.msr_filter_changed = svm_msr_filter_changed,.complete_emulated_msr = svm_complete_emulated_msr,.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
};
cpu_hw_events
svm_vcpu_create
reverse_cpuid 逆向(存储)cpuid
struct cpuid_reg { // cpuid 注册u32 function; // 功能u32 index; // 索引int reg; // 寄存器
};static const struct cpuid_reg reverse_cpuid[] = {[CPUID_1_EDX] = { 1, 0, CPUID_EDX},[CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX},[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},[CPUID_1_ECX] = { 1, 0, CPUID_ECX},[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},[CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},[CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX},[CPUID_6_EAX] = { 6, 0, CPUID_EAX},[CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX},[CPUID_7_ECX] = { 7, 0, CPUID_ECX},[CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX},[CPUID_7_EDX] = { 7, 0, CPUID_EDX},[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
};
vmcb_save_area 虚拟控制保存区域
/* 为遗留和SEV-MEM客户机保存区域定义 */
struct vmcb_save_area {struct vmcb_seg es; // 虚拟控制段 esstruct vmcb_seg cs;struct vmcb_seg ss;struct vmcb_seg ds;struct vmcb_seg fs;struct vmcb_seg gs;struct vmcb_seg gdtr;struct vmcb_seg ldtr;struct vmcb_seg idtr;struct vmcb_seg tr;u8 reserved_1[42];u8 vmpl;u8 cpl;u8 reserved_2[4];u64 efer;u8 reserved_3[112];u64 cr4;u64 cr3;u64 cr0;u64 dr7;u64 dr6;u64 rflags;u64 rip;u8 reserved_4[88];u64 rsp;u64 s_cet;u64 ssp;u64 isst_addr;u64 rax;u64 star;u64 lstar;u64 cstar;u64 sfmask;u64 kernel_gs_base;u64 sysenter_cs;u64 sysenter_esp;u64 sysenter_eip;u64 cr2;u8 reserved_5[32];u64 g_pat;u64 dbgctl;u64 br_from;u64 br_to;u64 last_excp_from;u64 last_excp_to;u8 reserved_6[72];u32 spec_ctrl; /* SPEC_CTRL在0x2E0的客户版本 */
} __packed;
vmcb_seg
vmcb_seg 虚拟控制段
vmcb 虚拟机控制块(Virtual Machine Control Block)
struct vmcb_seg {u16 selector; // 选择器u16 attrib; // 属性u32 limit; // 限制u64 base; // 基地址
} __packed;
svm_cpu_data 向量机cpu数据
svm 支持向量机(support vector machines)
struct svm_cpu_data {u64 asid_generation; 通用地址空间标识符// asid 地址空间标识符(address space identifier)u32 max_asid; // 最大值u32 next_asid; // 下一个u32 min_asid; // 最小值struct kvm_ldttss_desc *tss_desc;struct page *save_area; // 保存区域unsigned long save_area_pa; struct vmcb *current_vmcb; 当前的虚拟控制块// vmcb 虚拟机控制块(Virtual Machine Control Block)/* index = sev_asid, value = vmcb pointer */struct vmcb **sev_vmcbs; // 安全加密虚拟控制块// sev 安全加密虚拟化(Secure Encrypted Virtualization)
};
cpu_hw_events cpu硬件事件
struct cpu_hw_events {/** 通用x86 PMC位* PMC 性能监视器计数器(Performance Monitor Counter)*/struct perf_event *events[X86_PMC_IDX_MAX]; /* order计数 */unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];int enabled;int n_events; /* 下面数组中的事件 */int n_added; /* 以下数组中的最后一个事件;它们从未启用过 */int n_txn; /* 以下数组中的最后一个事件;添加到当前事务中 */int n_txn_pair;int n_txn_metric;int assign[X86_PMC_IDX_MAX]; /* 事件到计数器分配 */u64 tags[X86_PMC_IDX_MAX];struct perf_event *event_list[X86_PMC_IDX_MAX]; /* 按启用顺序 */struct event_constraint *event_constraint[X86_PMC_IDX_MAX];int n_excl; /* 独占事件的数量 */unsigned int txn_flags;int is_fake;/** 英特尔DebugStore位*/struct debug_store *ds;void *ds_pebs_vaddr;void *ds_bts_vaddr;u64 pebs_enabled;int n_pebs;int n_large_pebs;int n_pebs_via_pt;int pebs_output;/* 当前事件硬件配置的超级集 */u64 pebs_data_cfg;u64 active_pebs_data_cfg;int pebs_record_size;/* Intel固定计数器配置 */u64 fixed_ctrl_val;u64 active_fixed_ctrl_val;/** Intel LBR位*/int lbr_users;int lbr_pebs_users;struct perf_branch_stack lbr_stack;struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];union {struct er_account *lbr_sel;struct er_account *lbr_ctl;};u64 br_sel;void *last_task_ctx;int last_log_id;int lbr_select;void *lbr_xsave;/** Intel主机/来宾排除位*/u64 intel_ctrl_guest_mask;u64 intel_ctrl_host_mask;struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];/** 英特尔检查点掩码*/u64 intel_cp_status;/** 管理在Intel NHM/WSM/SNB上使用的共享(每核,每cpu)寄存器*/struct intel_shared_regs *shared_regs;/** 管理超线程之间的独占计数器访问*/struct event_constraint *constraint_list; /* 按启用顺序 */struct intel_excl_cntrs *excl_cntrs;int excl_thread_id; /* 0 or 1 *//** SKL TSX_FORCE_ABORT shadow*/u64 tfa_shadow;/** 性能指标*//* 接受的度量事件数 */int n_metric;/** AMD特定位*/struct amd_nb *amd_nb;int brs_active; /* 启用BRS *//* 要在perf_ctr ctrl寄存器中清除的位的反转掩码 */u64 perf_ctr_virt_mask;int n_pair; /* 大增量事件 */void *kfree_on_online[X86_PERF_KFREE_MAX];struct pmu *pmu; // 通用性能监视单元
};
pmu 通用性能监视单元(generic performance monitoring unit)
struct pmu {struct list_head entry; // 列表struct module *module; // 模式struct device *dev; // 设备const struct attribute_group **attr_groups; // 属性组const struct attribute_group **attr_update; // 性能测试相关的属性组const char *name; // 名称int type; // 类型/** 各种常见的per-pmu特性标志*/int capabilities;int __percpu *pmu_disable_count;struct perf_cpu_context __percpu *pmu_cpu_context;atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */int task_ctx_nr;int hrtimer_interval_ms;/* 此PMU可以执行的地址筛选器数 */unsigned int nr_addr_filters;/** 完全禁用/启用PMU,可以用于防止PMI以及MSRs的惰性/批量写入*/void (*pmu_enable) (struct pmu *pmu); /* optional */void (*pmu_disable) (struct pmu *pmu); /* optional *//** 尝试并初始化此PMU的事件** Returns:* -ENOENT -- @event is not for this PMU** -ENODEV -- @event is for this PMU but PMU not present* -EBUSY -- @event is for this PMU but PMU temporarily unavailable* -EINVAL -- @event is for this PMU but @event is not valid* -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported* -EACCES -- @event is for this PMU, @event is valid, but no privileges** 0 -- @event is for this PMU and valid** Other error return values are allowed.*/int (*event_init) (struct perf_event *event);/** 已映射或未映射事件的通知* 在映射任务的上下文中调用*/void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional *//** Flags for ->add()/->del()/ ->start()/->stop(). There are* matching hw_perf_event::state flags.*/
#define PERF_EF_START 0x01 /* 添加时启动计数器 */
#define PERF_EF_RELOAD 0x02 /* 启动时重新加载计数器 */
#define PERF_EF_UPDATE 0x04 /* 停止时更新计数器 *//** 向PMU中添加/从PMU中删除一个计数器,可以在事务中完成,参见->*_txn()方法** 添加/删除回调将保留服务事件所需的所有硬件资源,这包括任何计数器约束调度等** 在禁用IRQs和禁用CPU上的PMU的情况下调用该事件** 在没有PERF_EF_START的情况下调用的add()* 应导致与->add() 后 跟->stop()相同的状态** del()必须总是PERF_EF_UPDATE停止一个事件* 如果它调用->stop(),则必须在没有PERF_EF_UPDATE的情况下处理已经停止的情况*/int (*add) (struct perf_event *event, int flags);void (*del) (struct perf_event *event, int flags);/** 启动/停止PMU上的计数器** 当perf_event_overflow()返回!0时,PMI处理程序应该停止计数器* ->start()将用于继续** 也用于更改采样周期** 在事件发生的CPU上禁用IRQ和禁用PMU的情况下调用--将从NMI上下文调用,PMU生成NMI** 带有PERF_EF_UPDATE的stop()将读取计数器并更新周期/计数值,如->read()** 带有PERF_EF_RELOAD的start()将重新编程计数器值* 前面必须是带有PERF_EFI_UPDATE的->stop()*/void (*start) (struct perf_event *event, int flags);void (*stop) (struct perf_event *event, int flags);/** 更新事件的计数器值** 对于具有采样能力的PMU,这还将更新软件周期hw_perf_event :: period_left字段*/void (*read) (struct perf_event *event);/** 组事件调度被视为一个事务,将组事件作为一个整体添加,并执行一个可调度性测试* 如果测试失败,则回退整个组** 启动事务,之后* this->add()不需要进行可调度性测试** 可选的*/void (*start_txn) (struct pmu *pmu, unsigned int txn_flags);/** 如果->start_txn()禁用了->add()可调度性测试,则->commit_txn()需要执行一个* 如果成功,则关闭事务* 如果出现错误,事务将保持打开状态,直到->cancel_txn()被调用** 可选的*/int (*commit_txn) (struct pmu *pmu);/** 将取消事务,假设->del()被调用为每个成功的->add()在事务中** 可选的*/void (*cancel_txn) (struct pmu *pmu);/** 将返回此事件的perf_event_mmap_page::index的值,如果未提供实现* 则默认为:event->hw.idx+1*/int (*event_idx) (struct perf_event *event); /* 可选的 *//** 上下文切换回调*/void (*sched_task) (struct perf_event_context *ctx,bool sched_in);/** PMU特定数据的Kmem缓存*/struct kmem_cache *task_ctx_cache;/** 任务性能事件上下文的PMU特定部分(即ctx->task_ctx_data)可以使用此函数进行同步* 有关用法示例,请参阅Intel LBR调用堆栈支持实现和Perf核心上下文开关处理回调*/void (*swap_task_ctx) (struct perf_event_context *prev,struct perf_event_context *next);/* 可选的 *//** 为AUX区域设置pmu专用数据结构*/void *(*setup_aux) (struct perf_event *event, void **pages,int nr_pages, bool overwrite);/* 可选的 *//** 释放 pmu-private AUX数据结构*/void (*free_aux) (void *aux); /* 可选的 *//** 在不接触事件状态的情况下拍摄AUX缓冲区的快照,以便抢占->start() /->stop()回调不会干扰其逻辑* 在PMI上下文中调用** 返回复制到输出句柄的AUX数据的大小** 可选的*/long (*snapshot_aux) (struct perf_event *event,struct perf_output_handle *handle,unsigned long size);/** 验证地址范围筛选器:确保硬件支持请求的配置和筛选器数量* 如果提供的筛选器有效,则返回0,否则返回-errno** 在ioctl()进程的上下文中运行,不会与其他PMU回调一起序列化*/int (*addr_filters_validate) (struct list_head *filters);/* 可选的 *//** 同步地址范围筛选器配置:* 在event::hw::addr_filters中,将hw不可知筛选器转换为硬件配置** 作为过滤器同步序列的一部分运行* 该序列通过调用perf_event_addr_filters_sync()在->start()回调中完成** 可以(并且应该)遍历event::addr_filters::list,其调用者为其提供必要的序列化*/void (*addr_filters_sync) (struct perf_event *event);/* 可选的 *//** 检查事件是否可用于此PMU事件的aux_输出目的** 从perf_event_open()运行* 对于“不匹配”应返回0,对于“匹配”应为非零*/int (*aux_output_match) (struct perf_event *event);/* 可选的 *//** 筛选PMU特定原因的事件*/int (*filter_match) (struct perf_event *event); /* 可选的 *//** 检查PERF_EVENT_IOC_period ioctl的周期值*/int (*check_period) (struct perf_event *event, u64 value); /* 可选的 */
};
kvm_only_cpuid_leafs kvm的CPUID叶
/** 硬件定义的CPUID叶散布在内核中,但需要由KVM直接使用* 注意,这些单词值与内核的“bug”上限冲突,但KVM不使用这些* /
enum kvm_only_cpuid_leafs {CPUID_12_EAX = NCAPINTS, // 20NR_KVM_CPU_CAPS, // 21NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, // 1
};
cpuinfo_x86 每(个)cpu的数据结构
struct cpuinfo_x86 {__u8 x86; /* CPU系列 */__u8 x86_vendor; /* CPU厂商 */__u8 x86_model; /* 模式 */__u8 x86_stepping; /* 步进 */
#ifdef CONFIG_X86_64/* DTLB/ITLB中4K页面的总和(以页为单位) */int x86_tlbsize;
#endif
#ifdef CONFIG_X86_VMX_FEATURE_NAMES__u32 vmx_capability[NVMXINTS];
#endif__u8 x86_virt_bits; /* 虚拟位数 */__u8 x86_phys_bits; /* 物理位数 *//* CPUID返回的核心id位 */__u8 x86_coreid_bits;__u8 cu_id;/* 支持的最大扩展CPUID功能 */__u32 extended_cpuid_level;/* 支持的最大CPUID级别,-1=no CPUID */int cpuid_level;/** 对齐到无符号长的大小,因为x86_capability数组被传递给需要对齐的位操作* 使用未命名的并集强制数组与无符号long的大小对齐*/union {__u32 x86_capability[NCAPINTS + NBUGINTS];unsigned long x86_capability_alignment;};char x86_vendor_id[16];char x86_model_id[64];/* 以KB为单位-对支持此呼叫的CPUS有效 */unsigned int x86_cache_size;int x86_cache_alignment; /* 以字节为单位 *//* 缓存QoS体系结构值,仅在BSP上有效 */int x86_cache_max_rmid; /* 最大指数 */int x86_cache_occ_scale; /* 缩放到字节 */int x86_cache_mbm_width_offset;int x86_power;unsigned long loops_per_jiffy;/* 受保护的处理器识别号 */u64 ppin;/* cpuid返回的最大颜色值 */u16 x86_max_cores;u16 apicid;u16 initial_apicid;u16 x86_clflush_size;/* 操作系统显示的颜色数 */u16 booted_cores;/* 物理处理器id */u16 phys_proc_id;/* 逻辑处理器id */u16 logical_proc_id;/* 核心id */u16 cpu_core_id;u16 cpu_die_id;u16 logical_die_id;/* per_cpu列表索引 */u16 cpu_index;/* SMT在此核心上是否活动? */bool smt_active;u32 microcode;/* 缓存内部使用的地址空间位 */u8 x86_cache_bits;unsigned initialized : 1;
} __randomize_layout;
cpuid_leafs cpuid叶
enum cpuid_leafs
{CPUID_1_EDX = 0,CPUID_8000_0001_EDX,CPUID_8086_0001_EDX,CPUID_LNX_1,CPUID_1_ECX,CPUID_C000_0001_EDX,CPUID_8000_0001_ECX,CPUID_LNX_2,CPUID_LNX_3,CPUID_7_0_EBX,CPUID_D_1_EAX,CPUID_LNX_4,CPUID_7_1_EAX,CPUID_8000_0008_EBX,CPUID_6_EAX,CPUID_8000_000A_EDX,CPUID_7_ECX,CPUID_8000_0007_EBX,CPUID_7_EDX,CPUID_8000_001F_EAX,
};
__unused_size_checks 重要结构大小检查
虚拟控制保存区域
用户虚拟通讯保存区域
安全加密虚拟化状态保存区域
虚拟控制区域
用户(来宾)虚拟层通信块
static inline void __unused_size_checks(void)
{BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); // 虚拟控制保存区域检查// vmcb 虚拟机控制块(Virtual Machine Control Block)// #define EXPECTED_VMCB_SAVE_AREA_SIZE 740BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE); // 用户虚拟通讯保存区域// ghcb 用户(来宾)虚拟层通信块(Guest Hypervisor Communication Block)// #define EXPECTED_GHCB_SAVE_AREA_SIZE 1032BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); // 安全加密虚拟化状态保存区域// sev 安全加密虚拟化(Secure Encrypted Virtualization)// es 加密状态(Encrypted State)// #define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); // 虚拟控制区域// #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); // 用户(来宾)虚拟层通信块// #define EXPECTED_GHCB_SIZE PAGE_SIZE
}
vmcb_save_area
svm_hardware_setup svm硬件相关设置
获取待分配页的订单大小
如果启用了NX大页面缓解,则影子分页和NPT都需要NX
分配页,获取页面映射的虚拟地址,填充内存(0xFF,每字节)
获取kvm模式特征寄存器列表中的成员
经过偏移运算后写入msrpm_offsets[MSRPM_OFFSETS]列表
获取/设置标志位,并放入kvm_uret_msrs_list列表
检查暂停过滤支持,包括滤波暂停截距、暂停过滤器阈值
KVM的MMU本身不支持使用2级分页,因此如果主机使用2级寻呼,
则不支持NPT,因为主机CR4在VMRUN上保持不变
强制VM NPT级别等于主机的分页级别,设置shadow_me_value和shadow_mo_mask
kvm内存管理单元设置掩码和值
sev硬件设置
svm_hv硬件设置,设置刷新tlb相关函数
向量机cpu初始化
avic硬件设置(包括LBR虚拟化支持),用于IOMMU驱动调用唤醒vcpu任务
svm设置cpu(寄存器)功能
static __init int svm_hardware_setup(void)
{int cpu;struct page *iopm_pages;void *iopm_va;int r;unsigned int order = get_order(IOPM_SIZE); // 获取待分配页的订单大小// #define IOPM_SIZE PAGE_SIZE * 3/** 如果启用了NX大页面缓解,则影子分页和NPT都需要NX*/if (!boot_cpu_has(X86_FEATURE_NX)) { // 是否设置禁止运行位// #define X86_FEATURE_NX ( 1*32+20) /* 禁用运行 */// NX (No execute) 禁止运行pr_err_ratelimited("NX (Execute Disable) not supported\n");return -EOPNOTSUPP;}kvm_enable_efer_bits(EFER_NX);// #define EFER_NX (1<<_EFER_NX)// #define _EFER_NX 11 /* 启用不执行 */iopm_pages = alloc_pages(GFP_KERNEL, order); // 分配页iopm_va = page_address(iopm_pages); // 获取页面映射的虚拟地址memset(iopm_va, 0xff, PAGE_SIZE * (1 << order)); // 填充内存iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; // 物理页框号左移12位得到基地址init_msrpm_offsets(); // 获取kvm模式特征寄存器列表中的成员,经过偏移运算后写入msrpm_offsets[MSRPM_OFFSETS]列表
svm_direct_access_msrs
kvm_caps.supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS |XFEATURE_MASK_BNDCSR); // supported_xcr0 移除这两个标志...kvm_caps.max_tsc_scaling_ratio = SVM_TSC_RATIO_MAX;kvm_caps.tsc_scaling_ratio_frac_bits = 32;tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); // 获取/设置标志位,并放入kvm_uret_msrs_list列表/* 检查暂停过滤支持 */if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { // 滤波暂停截距pause_filter_count = 0;pause_filter_thresh = 0;} else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { // 暂停过滤器阈值pause_filter_thresh = 0;}if (nested) {printk(KERN_INFO "kvm: Nested Virtualization enabled\n");kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);}/** KVM的MMU本身不支持使用2级分页,因此如果主机使用2级寻呼,* 则不支持NPT,因为主机CR4在VMRUN上保持不变*/if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))npt_enabled = false;if (!boot_cpu_has(X86_FEATURE_NPT))npt_enabled = false;/* 强制VM NPT级别等于主机的分页级别 */kvm_configure_mmu(npt_enabled, get_npt_level(),get_npt_level(), PG_LEVEL_1G);/* 设置shadow_me_value和shadow_mo_mask */kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);svm_adjust_mmio_mask(); // kvm内存管理单元设置掩码和值
svm_adjust_mmio_mask
/** SEV设置使用npt_enabled和enable_mmio_catching(可以通过svm_adjust_mmio_mask()修改)*/sev_hardware_setup(); // sev硬件设置
sev_hardware_setup
svm_hv_hardware_setup(); // svm_hv硬件设置
// 设置刷新tlb相关函数/** 这是AMD特有的,并指定支持开明TLB刷新* 如果用户选择此功能,ASID失效仅刷新gva->hpa映射条目* 要刷新从NPT派生的TLB条目,* 应使用超级调用(HvFlushGuestPhysicalAddressSpace或HvFlush GuestPhysical AddressList)* /
// #define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22) /* 嵌套的功能. 这些是 HYPERV_CPUID_NESTED_FEATURES.EAX 位. */
// #define HV_X64_NESTED_DIRECT_FLUSH BIT(17)for_each_possible_cpu(cpu) {r = svm_cpu_init(cpu); // 向量机cpu初始化if (r)goto err;}
svm_cpu_data
if (nrips) { // 启用下一次RIP保存if (!boot_cpu_has(X86_FEATURE_NRIPS))nrips = false;}enable_apicv = avic = avic && avic_hardware_setup(&svm_x86_ops); // avic硬件设置,用于IOMMU驱动调用唤醒vcpu任务
svm_x86_ops
avic_hardware_setup
if (vls) {
// 启用/禁用虚拟VMLOAD VMSAVE
// static int vls = true;
// module_param(vls, int, 0444);if (!npt_enabled ||!boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||!IS_ENABLED(CONFIG_X86_64)) { // 都有效,执行下面分支// #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */vls = false;} else {pr_info("Virtual VMLOAD VMSAVE supported\n");}}if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))// #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME地址检查 */svm_gp_erratum_intercept = false;if (vgif) {if (!boot_cpu_has(X86_FEATURE_VGIF))// #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */// 启用此功能后,处理器在执行STGI/CLGI指令时使用60h位置的第9位作为虚拟GIF// 即使没有设置GIF,也会打开IRQ窗口,假设在恢复L1 hypervisor时,IRQ将保持挂起状态,直到处理器执行STGI指令// 对于NMI窗口,设置了STGI拦截。这将有助于仅在GIF=1时打开窗口// NMI 不可屏蔽中断(Nonmaskable Interrupt)vgif = false;elsepr_info("Virtual GIF supported\n");}if (lbrv) {if (!boot_cpu_has(X86_FEATURE_LBRV))// #define X86_FEATURE_LBRV (15*32+ 1) /* LBR虚拟化支持 */// LBR 最后分支记录(Last Branch Records)lbrv = false;elsepr_info("LBR virtualization supported\n");}svm_set_cpu_caps(); // svm设置cpu(寄存器)功能
svm_set_cpu_caps
/** 似乎在AMD处理器上,PTE的访问位是由CPU硬件在NPF vmexit之前设置的* 这不是预期的行为,我们的测试因此失败* 这里的一个解决方法是禁用对GUEST_MAXPHYADDR < HOST_MAXPHYADDR的支持,如果NPT是启用的* 在这种情况下,用户空间可以知道是否有支持使用KVM_CAP_SMALLER_MAXPHYADDR扩展,并决定如何处理它* 如果未来的AMD CPU模型改变上述行为,该变量可以相应改变*/allow_smaller_maxphyaddr = !npt_enabled;return 0;err:svm_hardware_unsetup();return r;
}
svm_adjust_mmio_mask kvm内存管理单元设置掩码和值
/** 默认MMIO掩码为单个位(不包括当前位),可能与内存加密位冲突* 检查内存加密支持,如果启用了内存加密,则覆盖默认MMIO掩码* /
static __init void svm_adjust_mmio_mask(void)
{unsigned int enc_bit, mask_bit;u64 msr, mask;/* 如果不支持内存加密,请使用已有的掩码 */if (cpuid_eax(0x80000000) < 0x8000001f)return;/* 如果未启用内存加密,请使用现有掩码 */rdmsrl(MSR_AMD64_SYSCFG, msr);enc_bit = cpuid_ebx(0x8000001f) & 0x3f; mask_bit = boot_cpu_data.x86_phys_bits;/* 如果掩码位与加密位相同,则递增掩码位 */if (enc_bit == mask_bit)mask_bit++;/** 如果掩码位位置低于52,那么将始终保留物理寻址限制以上的一些位,因此使用rsvd_bits()函数生成掩码*/mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); // kvm内存管理单元设置掩码和值
}
sev_hardware_setup sev硬件设置
检查CPU是否支持解码辅助
获取SEV CPUID信息
为SEV-ES用户(来宾)设置加密位位置
以及同时支持的最大加密用户数
SEV用户应使用的最小ASID值
初始化SEV ASID位图
设置杂项控制组MISC_CG_RES_SEV类型的容量
设置杂项控制组MISC_CG_RES_SEV_ES类
void __init sev_hardware_setup(void)
{
#ifdef CONFIG_KVM_AMD_SEVunsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;bool sev_es_supported = false;bool sev_supported = false;if (!sev_enabled || !npt_enabled)goto out;/** 显然,硬件必须支持SEV* 仔细检查CPU是否支持解码辅助,这是SEV用户(来宾)必须支持指令模拟的*/if (!boot_cpu_has(X86_FEATURE_SEV) ||WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))goto out;/* 获取SEV CPUID信息 */cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);/* 为SEV-ES用户(来宾)设置加密位位置 */sev_enc_bit = ebx & 0x3f;/* 同时支持的最大加密用户数 */max_sev_asid = ecx;if (!max_sev_asid)goto out;/* SEV用户应使用的最小ASID值 */min_sev_asid = edx;sev_me_mask = 1UL << (ebx & 0x3f);/** 初始化SEV ASID位图* 为位图中的ASID 0分配空间,即使它从未使用过,以便位图由实际的ASID索引*/nr_asids = max_sev_asid + 1;sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); sev_asid_count = max_sev_asid - min_sev_asid + 1; // asid数量if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)) // 设置杂项控制组MISC_CG_RES_SEV类型的容量// static unsigned long misc_res_capacity[MISC_CG_RES_TYPES];goto out;pr_info("SEV supported: %u ASIDs\n", sev_asid_count);sev_supported = true; // 安全加密虚拟化/*SEV-ES支持 */if (!sev_es_enabled)goto out;/** SEV-ES需要MMIO缓存,因为KVM无法访问客户机指令流,* 即无法响应#NPF进行仿真,而是依赖#NPF(RSVD)作为* #VC反映到客户机中(然后客户机可以执行#VMGEXIT以请求MMIO仿真)*/if (!enable_mmio_caching)goto out;/* CPU是否支持SEV-ES? */if (!boot_cpu_has(X86_FEATURE_SEV_ES))goto out;/* 系统是否为SEV-ES分配了ASID? */if (min_sev_asid == 1)goto out;sev_es_asid_count = min_sev_asid - 1;if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)) // 设置杂项控制组MISC_CG_RES_SEV_ES类型的容量goto out;pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count);sev_es_supported = true; // 安全加密虚拟化状态out:sev_enabled = sev_supported;sev_es_enabled = sev_es_supported;
#endif
}
svm_vcpu_create
static int svm_vcpu_create(struct kvm_vcpu *vcpu)
{struct vcpu_svm *svm;struct page *vmcb01_page;struct page *vmsa_page = NULL;int err;BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0); // struct kvm_vcpu vcpu是vcpu_svm结构的第一个成员svm = to_svm(vcpu); // 偏移到vcpu_svm结构对象// 通过container_oferr = -ENOMEM;vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); // 分配干净页if (sev_es_guest(vcpu->kvm)) {/** SEV-ES客户机需要一个单独的VMSA页面,用于包含客户机的加密寄存器状态*/vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);/** SEV-ES客户维护其FPU状态的加密版本,该版本将恢复并保存在VMRUN和VMEXIT上* 标记vcpu->arch.guest_fpu->fpstate为scratch,因此它不会对其执行xsave/xrstor*/fpstate_set_confidential(&vcpu->arch.guest_fpu);// guest_fpu->fpstate->is_confidential = true;}err = avic_init_vcpu(svm); // // avic 高级虚拟中断控制器(Advanced Virtual Interrupt Controller)
avic_init_backing_page
avic_init_backing_page
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
{u64 *entry, new_entry;int id = vcpu->vcpu_id; // 虚拟cpu idstruct vcpu_svm *svm = to_svm(vcpu);if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) ||(avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID)) // 这里模式为AVIC_MODE_NONEreturn -EINVAL;if (!vcpu->arch.apic->regs)return -EINVAL;if (kvm_apicv_activated(vcpu->kvm)) {int ret;ret = avic_alloc_access_page(vcpu->kvm);if (ret)return ret;}
avic_hardware_setup avic硬件设置,用于IOMMU驱动调用唤醒vcpu任务
avic 高级虚拟中断控制器(Advanced Virtual Interrupt Controller)
bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
{if (!npt_enabled)return false;if (boot_cpu_has(X86_FEATURE_AVIC)) { // 符合条件// #define X86_FEATURE_AVIC (15*32+13) /* 虚拟中断控制器 */// extern struct cpuinfo_x86 boot_cpu_data;avic_mode = AVIC_MODE_X1;pr_info("AVIC enabled\n");} else if (force_avic) {/** Some older systems does not advertise AVIC support.* See Revision Guide for specific AMD processor for more detail.*/avic_mode = AVIC_MODE_X1; // x1 高级虚拟中断控制器 (最高支持255个虚拟cpu)pr_warn("AVIC is not supported in CPUID but force enabled");pr_warn("Your system might crash and burn");}/* AVIC是x2AVIC的先决条件 */if (boot_cpu_has(X86_FEATURE_X2AVIC)) { // 符合条件// #define X86_FEATURE_X2AVIC (15*32+18) /* 虚拟x2apic */if (avic_mode == AVIC_MODE_X1) {avic_mode = AVIC_MODE_X2; // x2 高级虚拟中断控制器 (最高支持511个虚拟cpu)pr_info("x2AVIC enabled\n");} else {pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");pr_warn(FW_BUG "Try enable AVIC using force_avic option");}}if (avic_mode != AVIC_MODE_NONE)amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); // 注册avic_ga_log_notifier函数 // avic_ga_log_notifier 赋值到iommu_ga_log_notifier// 该函数从IOMMU驱动调用,通知SVM调度特定虚拟机的特定vCPU// 用于唤醒vCPU的任务return !!avic_mode;
}
kvm_vcpu_wake_up
kvm_vcpu_wake_up kvm虚拟cpu唤醒任务
通过kvm_vcpu结构的rcuwait对象
获取待唤醒的任务结构
然后通过wake_up_process函数唤醒任务
通过kvm_vcpu_stat结构的kvm_vcpu_stat_generic对象的成员
halt_wakeup自增表示已被唤醒
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{if (__kvm_vcpu_wake_up(vcpu)) {WRITE_ONCE(vcpu->ready, true);++vcpu->stat.generic.halt_wakeup;return true;}return false;
}
EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
||
\/
int rcuwait_wake_up(struct rcuwait *w)
{int ret = 0;struct task_struct *task;rcu_read_lock();/** 订单条件vs@task,这样加载@task之前的所有内容都是可见的* 这就是为什么用户首先调用rcuwait_wake()的条件* 与rcuwait_event()中的set_current_state()屏障(A)配对** WAIT WAKE* [S] tsk = current [S] cond = true* MB (A) MB (B)* [L] cond [L] tsk*/smp_mb(); /* (B) */task = rcu_dereference(w->task);if (task)ret = wake_up_process(task);rcu_read_unlock();return ret;
}
EXPORT_SYMBOL_GPL(rcuwait_wake_up);
svm_set_cpu_caps svm设置cpu(寄存器)功能
static __init void svm_set_cpu_caps(void)
{kvm_set_cpu_caps(); // kvm设置cpu(寄存器)功能
kvm_set_cpu_caps
kvm_caps.supported_xss = 0;/* CPUID 0x80000001 and 0x8000000A (SVM features) */if (nested) {kvm_cpu_cap_set(X86_FEATURE_SVM);// #define X86_FEATURE_SVM ( 6*32+ 2) /* 安全虚拟机 */kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);// #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB清洁位支持 */if (nrips)kvm_cpu_cap_set(X86_FEATURE_NRIPS);// #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" 支持向量机next_rip保存 */if (npt_enabled)kvm_cpu_cap_set(X86_FEATURE_NPT);// #define X86_FEATURE_NPT (15*32+ 0) /* 嵌套页表支持 */if (tsc_scaling)kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);// #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC缩放支持 */if (vls)kvm_cpu_cap_set(X86_FEATURE_V_VMSAVE_VMLOAD);// #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */if (lbrv)kvm_cpu_cap_set(X86_FEATURE_LBRV);// #define X86_FEATURE_LBRV (15*32+ 1) /* LBR虚拟化支持 */if (boot_cpu_has(X86_FEATURE_PAUSEFILTER))kvm_cpu_cap_set(X86_FEATURE_PAUSEFILTER);// #define X86_FEATURE_PAUSEFILTER (15*32+10) /* 滤波暂停截断 */if (boot_cpu_has(X86_FEATURE_PFTHRESHOLD))kvm_cpu_cap_set(X86_FEATURE_PFTHRESHOLD);// #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* 暂停过滤器阈值 */if (vgif)kvm_cpu_cap_set(X86_FEATURE_VGIF);// #define X86_FEATURE_VGIF (15*32+16) /* 虚拟GIF *//* 嵌套的VM可以接收#VMEXIT而不是触发#GP */kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);// #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME地址检查 */}/* CPUID 0x80000008 */if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||boot_cpu_has(X86_FEATURE_AMD_SSBD))kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);// #define X86_FEATURE_VIRT_SSBD (13*32+25) /* 虚拟化预测存储旁路关闭 *//* AMD PMU PERFCTR_CORE CPUID */if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);// #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* 核心性能计数器扩展 *//* CPUID 0x8000001F (SME/SEV 特征) */sev_set_cpu_caps(); // 如果特征存在,清除// #define X86_FEATURE_SEV (19*32+ 1) /* AMD安全加密虚拟化 */// #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD安全加密虚拟化-加密状态 */
}
kvm_set_cpu_caps kvm设置cpu(寄存器)功能
void kvm_set_cpu_caps(void)
{
#ifdef CONFIG_X86_64unsigned int f_gbpages = F(GBPAGES); // X86_FEATURE_GBPAGES// F 检查并返回特征位// #define X86_FEATURE_GBPAGES ( 1*32+26) /* “pdpe1gb”GB页面 */unsigned int f_lm = F(LM); // X86_FEATURE_LM// #define X86_FEATURE_LM ( 1*32+29) /* 长模式 (x86-64, 64-bit support) */unsigned int f_xfd = F(XFD); // X86_FEATURE_XFD // #define X86_FEATURE_XFD (10*32+ 4) /* 扩展功能禁用 */
#elseunsigned int f_gbpages = 0;unsigned int f_lm = 0;unsigned int f_xfd = 0;
#endif
F
memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) >sizeof(boot_cpu_data.x86_capability)); // NKVMCAPINTS 1memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps))); // 拷贝x86_capability数组地址
kvm_only_cpuid_leafs
cpuinfo_x86
kvm_cpu_cap_mask(CPUID_1_ECX,/** 注意:MONITOR(和MWAIT)被模拟为NOP,但*不*通过CPUID通告给客人*/F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |0 /* DS-CPL, VMX, SMX, EST */ |0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |F(F16C) | F(RDRAND)); // 掩码放入(或运算)kvm_cpu_caps[CPUID_1_ECX]中,表示CPUID_1_ECX寄存器支持功能// 用这个CPU的原始CPUID功能屏蔽leaf(叶)的kvm_cpu_caps// #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */// #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ指令 */// #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* 补充的 SSE-3 */// #define X86_FEATURE_FMA ( 4*32+12) /* 积和熔加运算 */// #define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B指令 */// #define X86_FEATURE_PDCM ( 4*32+15) /* 性能/调试能力MSR */// #define X86_FEATURE_PCID ( 4*32+17) /* 进程上下文标识符 */// #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */// #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */// #define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */// #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE指令 */// #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT指令 */// #define X86_FEATURE_AES ( 4*32+25) /* AES指令 */// #define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV指令 */// #define X86_FEATURE_AVX ( 4*32+28) /* 高级矢量扩展指令集 */// #define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP 转换 */// #define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND指令 */
cpuid_leafs
reverse_cpuid
/* KVM在软件中模拟x2apic,而不考虑主机支持 */kvm_cpu_cap_set(X86_FEATURE_X2APIC);kvm_cpu_cap_mask(CPUID_1_EDX,F(FPU) | F(VME) | F(DE) | F(PSE) |F(TSC) | F(MSR) | F(PAE) | F(MCE) |F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |0 /* Reserved, DS, ACPI */ | F(MMX) |F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |0 /* HTT, TM, Reserved, PBE */); // 掩码放入kvm_cpu_caps[CPUID_1_EDX]中,表示CPUID_1_EDX寄存器支持功能// #define X86_FEATURE_FPU ( 0*32+ 0) /* 机载(板卡)FPU */// #define X86_FEATURE_VME ( 0*32+ 1) /* 虚拟模式扩展 */// #define X86_FEATURE_DE ( 0*32+ 2) /* 调试扩展 */// #define X86_FEATURE_PSE ( 0*32+ 3) /* 页面大小扩展 */// #define X86_FEATURE_TSC ( 0*32+ 4) /* 时间戳计数器 */// #define X86_FEATURE_MSR ( 0*32+ 5) /* 特定模块寄存器 */// #define X86_FEATURE_PAE ( 0*32+ 6) /* 物理地址扩展 */// #define X86_FEATURE_MCE ( 0*32+ 7) /* 机器检查异常 */// #define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8指令 */// #define X86_FEATURE_APIC ( 0*32+ 9) /* 机载(板卡)APIC */// #define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */// #define X86_FEATURE_MTRR ( 0*32+12) /* 存储器型态范围寄存器 */// #define X86_FEATURE_PGE ( 0*32+13) /* 页面全局启用 */// #define X86_FEATURE_MCA ( 0*32+14) /* 机器检查体系结构 */// #define X86_FEATURE_CMOV ( 0*32+15) /* CMOV指令 (plus FCMOVcc, FCOMI with FPU) */// #define X86_FEATURE_PAT ( 0*32+16) /* 页属性表 */// #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */// #define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH指令 */// #define X86_FEATURE_MMX ( 0*32+23) /* 多媒体扩展 */// #define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */// #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */// #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */// #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss"CPU自检 */kvm_cpu_cap_mask(CPUID_7_0_EBX,F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |F(AVX512VL)); // CPUID_7_0_EBXkvm_cpu_cap_mask(CPUID_7_ECX,F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |F(SGX_LC) | F(BUS_LOCK_DETECT)); // CPUID_7_ECX/* 根据硬件能力设置LA57 */if (cpuid_ecx(7) & F(LA57))kvm_cpu_cap_set(X86_FEATURE_LA57);/** PKU尚未实现影子分页,需要在主机上设置OSPKE* 如果情况并非如此,请清除它*/if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))kvm_cpu_cap_clear(X86_FEATURE_PKU);kvm_cpu_cap_mask(CPUID_7_EDX,F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16)); // CPUID_7_EDX /* 在软件中模拟TSC_ADJUST和ARCH_CAPABILITIES */kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST);kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES);if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))// #define X86_FEATURE_IBPB ( 7*32+26) /* 间接分支预测障碍 */// #define X86_FEATURE_IBRS ( 7*32+25) /* 间接分支限制预测 */kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL);// #define X86_FEATURE_SPEC_CTRL (18*32+26) /* 预测控制 (IBRS + IBPB) */if (boot_cpu_has(X86_FEATURE_STIBP))// #define X86_FEATURE_STIBP ( 7*32+27) /* 单线程间接分支预测器 */kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);// #define X86_FEATURE_STIBP ( 7*32+27) /* 单线程间接分支预测器 */if (boot_cpu_has(X86_FEATURE_AMD_SSBD))// #define X86_FEATURE_AMD_SSBD (13*32+24) /* 预测存储旁路禁用 */kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);// #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* 预测存储旁路关闭 */kvm_cpu_cap_mask(CPUID_7_1_EAX,F(AVX_VNNI) | F(AVX512_BF16)); // CPUID_7_1_EAX能力// #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI 指令 */// #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 指令 */kvm_cpu_cap_mask(CPUID_D_1_EAX,F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd); // CPUID_D_1_EAX能力kvm_cpu_cap_init_scattered(CPUID_12_EAX,SF(SGX1) | SF(SGX2)); // 掩码赋值(=)kvm_cpu_caps[CPUID_12_EAX]中,表示CPUID_12_EAX能力// #define X86_FEATURE_SGX1 (11*32+ 8) /* 基本SGX */// #define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave动态内存管理(EDMM) */kvm_cpu_cap_mask(CPUID_8000_0001_ECX,F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |F(TOPOEXT) | 0 /* PERFCTR_CORE */); // CPUID_8000_0001_ECXkvm_cpu_cap_mask(CPUID_8000_0001_EDX,F(FPU) | F(VME) | F(DE) | F(PSE) |F(TSC) | F(MSR) | F(PAE) | F(MCE) |F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |F(PAT) | F(PSE36) | 0 /* Reserved */ |F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)); // CPUID_8000_0001_EDXif (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))kvm_cpu_cap_set(X86_FEATURE_GBPAGES);// #define X86_FEATURE_GBPAGES ( 1*32+26) /* “pdpe1gb”GB页面 */kvm_cpu_cap_mask(CPUID_8000_0008_EBX,F(CLZERO) | F(XSAVEERPTR) |F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |__feature_bit(KVM_X86_FEATURE_PSFD)); // CPUID_8000_0008_EBX/** AMD为每个SPEC_CTRL位具有单独的位* arch/x86/kernel/cpu/bugs.c很好地将其记录在cpufeatures中,所以使用它们*/if (boot_cpu_has(X86_FEATURE_IBPB))kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB);// #define X86_FEATURE_AMD_IBPB (13*32+12) /* 间接分支预测障碍 */if (boot_cpu_has(X86_FEATURE_IBRS))kvm_cpu_cap_set(X86_FEATURE_AMD_IBRS);// #define X86_FEATURE_AMD_IBRS (13*32+14) /* 间接分支限制预测 */if (boot_cpu_has(X86_FEATURE_STIBP))kvm_cpu_cap_set(X86_FEATURE_AMD_STIBP);// #define X86_FEATURE_AMD_STIBP (13*32+15) /* 单线程间接分支预测器 */if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))kvm_cpu_cap_set(X86_FEATURE_AMD_SSBD);// #define X86_FEATURE_AMD_SSBD (13*32+24) /* 预测存储旁路禁用 */if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))kvm_cpu_cap_set(X86_FEATURE_AMD_SSB_NO);// #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* 预测存储旁路在硬件上是固定的 *//** 首选使用SPEC CTRL MSR,而不是VIRT_SPEC MSR*/if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&!boot_cpu_has(X86_FEATURE_AMD_SSBD))kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);/** 默认情况下隐藏所有SVM特征,SVM将为它模拟的特征和/或暴露的L1设置上限位*/kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0);kvm_cpu_cap_mask(CPUID_8000_001F_EAX,0 /* SME */ | F(SEV) | 0 /* VM_PAGE_FLUSH */ | F(SEV_ES) |F(SME_COHERENT)); // CPUID_8000_001F_EAXkvm_cpu_cap_mask(CPUID_C000_0001_EDX,F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |F(PMM) | F(PMM_EN)); // CPUID_C000_0001_EDX/** 隐藏RDTSCP和RDPID,如果其中一个特性被报告为支持,但探测MSR_TSC_AUX失败* 这纯粹是一个健全检查,不应该发生,但是如果RDTSCP或RDPID被错误报告,客户机可能会崩溃,* 而且KVM过去曾搞砸过MSR_TSC_AUX模拟* 例如,如果这个KVM实例在旧的、损坏的KVM上以L1的身份运行,则可能触发完整性检查*/if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&!kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);kvm_cpu_cap_clear(X86_FEATURE_RDPID);}
}
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
F 检查并返回特征位
#define F feature_bit 检查并返回特征位
||
\/
#define feature_bit(name) __feature_bit(X86_FEATURE_##name)
||
\/
/** 从X86_FEATURE_*定义中检索位掩码* 特性包含硬件定义的位数(存储在位4:0中)和软件定义的“字”(存储在位31:5中)* 这个词用于索引到包含每个cpu特性功能的位掩码数组中,例如this_cpu_has()* /
static __always_inline u32 __feature_bit(int x86_feature)
{x86_feature = __feature_translate(x86_feature); // 返回特征,包括SGX1、SGX2和直接返回// #define KVM_X86_FEATURE(w, f) ((w)*32 + (f))// CPUID_12_EAX 20// #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)// KVM_X86_FEATURE_SGX1 (20 * 32 + 0)// #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)// KVM_X86_FEATURE_SGX2 (20 * 32 + 1)/** 反向CPUID及其派生只能用于硬件定义的特征字,即其位直接对应于CPUID叶的字* 从Linux定义的字中检索功能位或屏蔽来宾CPUID是没有意义的* 因为位号/掩码是任意软件定义的值,KVM无法使用它来查询/控制来宾功能* 显然,被查询的叶必须在查找表中有一个条目* /reverse_cpuid_check(x86_feature / 32); // 检查特征return 1 << (x86_feature & 31);
}