Discussion:
[PATCH 0/2] KVM: add support for Pause-Loop exit
(too old to reply)
Andy Whitcroft
2010-03-05 14:42:20 UTC
Permalink
Add kernel side support for KVM Pause-Loop exit. This allows long lock
spins in virtual machines to relinquish CPUs to other VMs.

-apw

Zhai, Edwin (2):
KVM: introduce kvm_vcpu_on_spin
KVM: VMX: Add support for Pause-Loop Exiting

arch/x86/include/asm/vmx.h | 4 +++
arch/x86/kvm/vmx.c | 51 +++++++++++++++++++++++++++++++++++++++++++-
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 15 +++++++++++++
4 files changed, 70 insertions(+), 1 deletions(-)
Andy Whitcroft
2010-03-05 14:42:21 UTC
Permalink
From: Zhai, Edwin <edwin.zhai at intel.com>

Introduce kvm_vcpu_on_spin, to be used by VMX/SVM to yield processing
once the cpu detects pause-based looping.

Signed-off-by: "Zhai, Edwin" <edwin.zhai at intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti at redhat.com>
Signed-off-by: Andy Whitcroft <apw at canonical.com>
---
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 15 +++++++++++++++
2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b7bbb5d..b5bcb4f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -266,6 +266,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7495ce3..faa40d9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1705,6 +1705,21 @@ void kvm_resched(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_resched);

+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
+{
+ ktime_t expires;
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+
+ /* Sleep for 100 us, and hope lock-holder got scheduled */
+ expires = ktime_add_ns(ktime_get(), 100000UL);
+ schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+
+ finish_wait(&vcpu->wq, &wait);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
+
static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct kvm_vcpu *vcpu = vma->vm_file->private_data;
--
1.7.0
Andy Whitcroft
2010-03-05 14:42:22 UTC
Permalink
From: Zhai, Edwin <edwin.zhai at intel.com>

New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution
control fields:
PLE_Gap - upper bound on the amount of time between two successive
executions of PAUSE in a loop.
PLE_Window - upper bound on the amount of time a guest is allowed to execute in
a PAUSE loop

If the time, between this execution of PAUSE and previous one, exceeds the
PLE_Gap, processor consider this PAUSE belongs to a new loop.
Otherwise, processor determins the the total execution time of this loop(since
1st PAUSE in this loop), and triggers a VM exit if total time exceeds the
PLE_Window.
* Refer SDM volume 3b section 21.6.13 & 22.1.3.

Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP
is sched-out after hold a spinlock, then other VPs for same lock are sched-in
to waste the CPU time.

Our tests indicate that most spinlocks are held for less than 212 cycles.
Performance tests show that with 2X LP over-commitment we can get +2% perf
improvement for kernel build(Even more perf gain with more LPs).

Signed-off-by: Zhai Edwin <edwin.zhai at intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti at redhat.com>
Signed-off-by: Andy Whitcroft <apw at canonical.com>
---
arch/x86/include/asm/vmx.h | 4 +++
arch/x86/kvm/vmx.c | 51 +++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 54 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 272514c..2b49454 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400


#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -144,6 +145,8 @@ enum vmcs_field {
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
@@ -248,6 +251,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed53b42..3b78663 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,6 +61,25 @@ module_param_named(unrestricted_guest,
static int __read_mostly emulate_invalid_guest_state = 0;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);

+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * ple_gap: upper bound on the amount of time between two successive
+ * executions of PAUSE in a loop. Also indicate if ple enabled.
+ * According to test, this time is usually small than 41 cycles.
+ * ple_window: upper bound on the amount of time a guest is allowed to execute
+ * in a PAUSE loop. Tests indicate that most spinlocks are held for
+ * less than 2^12 cycles
+ * Time is measured based on a counter that runs at the same rate as the TSC,
+ * refer SDM volume 3b section 21.6.13 & 22.1.3.
+ */
+#define KVM_VMX_DEFAULT_PLE_GAP 41
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
+module_param(ple_gap, int, S_IRUGO);
+
+static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
+module_param(ple_window, int, S_IRUGO);
+
struct vmcs {
u32 revision_id;
u32 abort;
@@ -320,6 +339,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void)
SECONDARY_EXEC_UNRESTRICTED_GUEST;
}

+static inline int cpu_has_vmx_ple(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+}
+
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
{
return flexpriority_enabled &&
@@ -1250,7 +1275,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ SECONDARY_EXEC_UNRESTRICTED_GUEST |
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -1394,6 +1420,9 @@ static __init int hardware_setup(void)
if (enable_ept && !cpu_has_vmx_ept_2m_page())
kvm_disable_largepages();

+ if (!cpu_has_vmx_ple())
+ ple_gap = 0;
+
return alloc_kvm_area();
}

@@ -2306,9 +2335,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ if (!ple_gap)
+ exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}

+ if (ple_gap) {
+ vmcs_write32(PLE_GAP, ple_gap);
+ vmcs_write32(PLE_WINDOW, ple_window);
+ }
+
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
@@ -3358,6 +3394,18 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
}

/*
+ * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
+ * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
+ */
+static int handle_pause(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ skip_emulated_instruction(vcpu);
+ kvm_vcpu_on_spin(vcpu);
+
+ return 1;
+}
+
+/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
* to be done to userspace and return 0.
@@ -3394,6 +3442,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
+ [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
};

static const int kvm_vmx_max_exit_handlers =
--
1.7.0
Tim Gardner
2010-03-05 15:54:28 UTC
Permalink
Post by Andy Whitcroft
Add kernel side support for KVM Pause-Loop exit. This allows long lock
spins in virtual machines to relinquish CPUs to other VMs.
-apw
KVM: introduce kvm_vcpu_on_spin
KVM: VMX: Add support for Pause-Loop Exiting
arch/x86/include/asm/vmx.h | 4 +++
arch/x86/kvm/vmx.c | 51 +++++++++++++++++++++++++++++++++++++++++++-
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 15 +++++++++++++
4 files changed, 70 insertions(+), 1 deletions(-)
These 2 clean upstream cherry-picks appear to preserve existing behavior
if the module parameters ple_gap and ple_window are _not_ set.

In general I'm not in favor of adding features at this late stage,
especially since there will be a backported kernel with this feature
with the L+1 release.

These however appear benign while providing some performance benefit, so
ACK from me. Please remember to add the upstream commit IDs.

rtg
--
Tim Gardner tim.gardner at canonical.com
Andy Whitcroft
2010-03-08 10:35:57 UTC
Permalink
Applied to Lucid.

-apw

Loading...