photoncloud-monorepo/plasmavmc/crates/plasmavmc-kvm/src/lib.rs

1267 lines
43 KiB
Rust

//! KVM/QEMU hypervisor backend for PlasmaVMC
//!
//! This crate provides the KVM backend implementation for the HypervisorBackend trait.
//! It uses QEMU with KVM acceleration to run virtual machines.
mod env;
mod qmp;
use async_trait::async_trait;
use env::{
resolve_kernel_initrd, resolve_nbd_aio_mode, resolve_nbd_max_queues, resolve_qcow2_path, resolve_qemu_path,
resolve_qmp_timeout_secs, resolve_runtime_dir, ENV_QCOW2_PATH,
};
use plasmavmc_hypervisor::{BackendCapabilities, HypervisorBackend, UnsupportedReason};
use plasmavmc_types::{
AttachedDisk, DiskAttachment, DiskBus, DiskCache, Error, HypervisorType, NetworkSpec,
NicModel, Result, VirtualMachine, VmHandle, VmSpec, VmState, VmStatus, VolumeFormat,
};
use qmp::QmpClient;
use serde_json::{json, Value};
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::time::Duration;
use tokio::process::Command;
use tokio::{net::UnixStream, time::Instant};
/// KVM/QEMU hypervisor backend
pub struct KvmBackend {
/// Path to QEMU binary
qemu_path: PathBuf,
/// Runtime directory for VM state
runtime_dir: PathBuf,
}
impl KvmBackend {
/// Create a new KVM backend
pub fn new(qemu_path: impl Into<PathBuf>, runtime_dir: impl Into<PathBuf>) -> Self {
Self {
qemu_path: qemu_path.into(),
runtime_dir: runtime_dir.into(),
}
}
/// Create with default paths
pub fn with_defaults() -> Self {
Self::new("/usr/bin/qemu-system-x86_64", resolve_runtime_dir())
}
fn qmp_socket_path(&self, handle: &VmHandle) -> PathBuf {
if let Some(path) = handle.backend_state.get("qmp_socket") {
PathBuf::from(path)
} else {
PathBuf::from(&handle.runtime_dir).join("qmp.sock")
}
}
}
fn volume_format_name(format: VolumeFormat) -> &'static str {
match format {
VolumeFormat::Raw => "raw",
VolumeFormat::Qcow2 => "qcow2",
}
}
fn build_rbd_uri(pool: &str, image: &str, monitors: &[String], user: &str) -> String {
let mut uri = format!("rbd:{pool}/{image}");
if !user.is_empty() {
uri.push_str(&format!(":id={user}"));
}
if !monitors.is_empty() {
uri.push_str(&format!(":mon_host={}", monitors.join(";")));
}
uri
}
fn disk_source_arg(disk: &AttachedDisk) -> Result<(String, &'static str)> {
match &disk.attachment {
DiskAttachment::File { path, format } => Ok((path.clone(), volume_format_name(*format))),
DiskAttachment::Nbd { uri, format } => Ok((uri.clone(), volume_format_name(*format))),
DiskAttachment::CephRbd {
pool,
image,
monitors,
user,
..
} => Ok((build_rbd_uri(pool, image, monitors, user), "raw")),
}
}
fn effective_disk_cache(disk: &AttachedDisk) -> DiskCache {
match (&disk.attachment, disk.cache) {
// Shared NBD-backed volumes perform better and behave more predictably
// with direct I/O than with host-side writeback caching.
(DiskAttachment::Nbd { .. }, DiskCache::Writeback) => DiskCache::None,
_ => disk.cache,
}
}
fn disk_cache_mode(cache: DiskCache) -> &'static str {
match cache {
DiskCache::None => "none",
DiskCache::Writeback => "writeback",
DiskCache::Writethrough => "writethrough",
}
}
fn disk_aio_mode(disk: &AttachedDisk) -> Option<&'static str> {
match (&disk.attachment, disk.cache) {
(DiskAttachment::File { .. }, DiskCache::None) => Some("native"),
(DiskAttachment::File { .. }, _) => Some("threads"),
(DiskAttachment::Nbd { .. }, _) => Some(resolve_nbd_aio_mode()),
(DiskAttachment::CephRbd { .. }, _) => None,
}
}
fn disk_uses_dedicated_iothread(disk: &AttachedDisk) -> bool {
matches!(
(&disk.attachment, disk.bus),
(DiskAttachment::Nbd { .. }, DiskBus::Virtio)
)
}
fn disk_queue_count(vm: &VirtualMachine, disk: &AttachedDisk) -> u16 {
if !disk_uses_dedicated_iothread(disk) {
return 1;
}
vm.spec.cpu
.vcpus
.clamp(1, resolve_nbd_max_queues().max(1) as u32) as u16
}
fn sanitize_device_component(value: &str, fallback_index: usize) -> String {
let sanitized: String = value
.chars()
.map(|ch| if ch.is_ascii_alphanumeric() { ch } else { '-' })
.collect();
if sanitized.is_empty() {
format!("disk-{fallback_index}")
} else {
sanitized
}
}
fn bootindex_suffix(boot_index: Option<u32>) -> String {
boot_index
.filter(|index| *index > 0)
.map(|index| format!(",bootindex={index}"))
.unwrap_or_default()
}
fn qmp_timeout() -> Duration {
Duration::from_secs(resolve_qmp_timeout_secs())
}
fn build_disk_args(vm: &VirtualMachine, disks: &[AttachedDisk]) -> Result<Vec<String>> {
if disks.is_empty() && vm.spec.disks.is_empty() {
let qcow_path = resolve_qcow2_path().ok_or_else(|| {
Error::HypervisorError(format!(
"{ENV_QCOW2_PATH} not set; provide qcow2 image to spawn VM"
))
})?;
if !qcow_path.exists() {
return Err(Error::HypervisorError(format!(
"Primary disk is not materialized at {}",
qcow_path.display()
)));
}
return Ok(vec![
"-drive".into(),
format!("file={},if=virtio,format=qcow2", qcow_path.display()),
]);
}
let mut args = Vec::new();
let has_scsi = vm
.spec
.disks
.iter()
.any(|disk| matches!(disk.bus, DiskBus::Scsi));
let has_ahci = vm
.spec
.disks
.iter()
.any(|disk| matches!(disk.bus, DiskBus::Ide | DiskBus::Sata));
if has_scsi {
args.push("-device".into());
args.push("virtio-scsi-pci,id=scsi0".into());
}
if has_ahci {
args.push("-device".into());
args.push("ich9-ahci,id=ahci0".into());
}
let mut disks: Vec<&AttachedDisk> = disks.iter().collect();
disks.sort_by(|lhs, rhs| {
lhs.boot_index
.unwrap_or(u32::MAX)
.cmp(&rhs.boot_index.unwrap_or(u32::MAX))
.then_with(|| lhs.id.cmp(&rhs.id))
});
let mut scsi_slot = 0usize;
let mut ahci_slot = 0usize;
for (index, disk) in disks.into_iter().enumerate() {
let disk_id = sanitize_device_component(&disk.id, index);
let (source, format_name) = disk_source_arg(disk)?;
if disk_uses_dedicated_iothread(disk) {
args.push("-object".into());
args.push(format!("iothread,id=iothread-{disk_id}"));
}
let effective_cache = effective_disk_cache(disk);
let mut drive_arg = format!(
"file={source},if=none,format={format_name},id=drive-{disk_id},cache={}",
disk_cache_mode(effective_cache)
);
if let Some(aio_mode) = disk_aio_mode(disk) {
drive_arg.push_str(&format!(",aio={aio_mode}"));
}
args.push("-drive".into());
args.push(drive_arg);
let bootindex = bootindex_suffix(disk.boot_index);
let device_arg = match disk.bus {
DiskBus::Virtio => {
let mut device_arg =
format!("virtio-blk-pci,drive=drive-{disk_id},id=disk-{disk_id}");
if disk_uses_dedicated_iothread(disk) {
let queues = disk_queue_count(vm, disk);
device_arg.push_str(&format!(
",iothread=iothread-{disk_id},num-queues={queues},queue-size=1024"
));
}
device_arg.push_str(&bootindex);
device_arg
}
DiskBus::Scsi => {
let slot = scsi_slot;
scsi_slot += 1;
format!(
"scsi-hd,drive=drive-{disk_id},id=disk-{disk_id},bus=scsi0.0,channel=0,scsi-id={slot},lun=0{bootindex}"
)
}
DiskBus::Ide | DiskBus::Sata => {
if ahci_slot >= 6 {
return Err(Error::HypervisorError(
"Too many IDE/SATA disks for a single AHCI controller".into(),
));
}
let slot = ahci_slot;
ahci_slot += 1;
format!(
"ide-hd,drive=drive-{disk_id},id=disk-{disk_id},bus=ahci0.{slot}{bootindex}"
)
}
};
args.push("-device".into());
args.push(device_arg);
}
Ok(args)
}
/// Build a minimal QEMU argument list for paused launch with QMP socket.
fn build_qemu_args(
vm: &VirtualMachine,
disks: &[AttachedDisk],
qmp_socket: &Path,
console_log: &Path,
kernel: Option<&Path>,
initrd: Option<&Path>,
) -> Result<Vec<String>> {
let mut args = vec![
"-machine".into(),
"q35,accel=kvm".into(),
"-name".into(),
vm.name.clone(),
"-m".into(),
vm.spec.memory.size_mib.to_string(),
"-smp".into(),
vm.spec.cpu.vcpus.to_string(),
"-cpu".into(),
vm.spec
.cpu
.cpu_model
.clone()
.unwrap_or_else(|| "host".into()),
"-enable-kvm".into(),
"-nographic".into(),
"-display".into(),
"none".into(),
"-monitor".into(),
"none".into(),
"-qmp".into(),
format!("unix:{},server=on,wait=off", qmp_socket.display()),
"-serial".into(),
format!("file:{}", console_log.display()),
"-S".into(),
];
args.extend(build_disk_args(vm, disks)?);
if let Some(kernel) = kernel {
args.push("-kernel".into());
args.push(kernel.display().to_string());
if let Some(initrd) = initrd {
args.push("-initrd".into());
args.push(initrd.display().to_string());
}
args.push("-append".into());
args.push("console=ttyS0".into());
}
Ok(args)
}
/// Build QEMU args for an incoming migration listener.
fn build_qemu_args_incoming(
vm: &VirtualMachine,
disks: &[AttachedDisk],
qmp_socket: &Path,
console_log: &Path,
kernel: Option<&Path>,
initrd: Option<&Path>,
listen_uri: &str,
) -> Result<Vec<String>> {
let mut args = build_qemu_args(vm, disks, qmp_socket, console_log, kernel, initrd)?;
// Remove -S from the paused launch; incoming migration manages CPU start.
if let Some(pos) = args.iter().position(|arg| arg == "-S") {
args.remove(pos);
}
args.push("-incoming".into());
args.push(listen_uri.to_string());
Ok(args)
}
/// Wait for QMP socket to become available.
async fn wait_for_qmp(qmp_socket: &Path, timeout: Duration) -> Result<()> {
let start = Instant::now();
loop {
match UnixStream::connect(qmp_socket).await {
Ok(stream) => {
drop(stream);
return Ok(());
}
Err(e) => {
if start.elapsed() >= timeout {
return Err(Error::HypervisorError(format!(
"Timed out waiting for QMP socket {}: {e}",
qmp_socket.display()
)));
}
tokio::time::sleep(Duration::from_millis(50)).await;
}
}
}
}
fn kill_pid(pid: u32) -> Result<()> {
let status = std::process::Command::new("kill")
.arg("-9")
.arg(pid.to_string())
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.map_err(|e| Error::HypervisorError(format!("Failed to invoke kill -9: {e}")))?;
if status.success() {
Ok(())
} else if !pid_running(pid) {
Ok(())
} else {
Err(Error::HypervisorError(format!(
"kill -9 exited with status: {status}"
)))
}
}
fn pid_running(pid: u32) -> bool {
std::process::Command::new("kill")
.arg("-0")
.arg(pid.to_string())
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.map(|status| status.success())
.unwrap_or(false)
}
fn vm_stopped_out_of_band(handle: &VmHandle, qmp_socket: &Path) -> bool {
if let Some(pid) = handle.pid {
return !pid_running(pid);
}
!qmp_socket.exists()
}
fn stopped_status() -> VmStatus {
VmStatus {
actual_state: VmState::Stopped,
..VmStatus::default()
}
}
#[async_trait]
impl HypervisorBackend for KvmBackend {
fn backend_type(&self) -> HypervisorType {
HypervisorType::Kvm
}
fn capabilities(&self) -> BackendCapabilities {
BackendCapabilities {
live_migration: true,
hot_plug_cpu: true,
hot_plug_memory: true,
hot_plug_disk: true,
hot_plug_nic: true,
vnc_console: true,
serial_console: true,
nested_virtualization: true,
gpu_passthrough: true,
max_vcpus: 256,
max_memory_gib: 4096,
supported_disk_buses: vec![DiskBus::Virtio, DiskBus::Scsi, DiskBus::Ide, DiskBus::Sata],
supported_nic_models: vec![NicModel::VirtioNet, NicModel::E1000],
}
}
fn supports(&self, _spec: &VmSpec) -> std::result::Result<(), UnsupportedReason> {
// KVM supports all features, so no limitations
Ok(())
}
async fn create(&self, vm: &VirtualMachine, disks: &[AttachedDisk]) -> Result<VmHandle> {
tracing::info!(
vm_id = %vm.id,
name = %vm.name,
"Creating VM (runtime prep + spawn)"
);
let runtime_dir = self.runtime_dir.join(vm.id.to_string());
tokio::fs::create_dir_all(&runtime_dir)
.await
.map_err(|e| Error::HypervisorError(format!("Failed to create runtime dir: {e}")))?;
let qmp_socket = runtime_dir.join("qmp.sock");
let console_log = runtime_dir.join("console.log");
// Remove stale socket if it exists from a previous run.
let _ = tokio::fs::remove_file(&qmp_socket).await;
let _ = tokio::fs::remove_file(&console_log).await;
let qemu_bin = resolve_qemu_path(&self.qemu_path);
let (kernel_path, initrd_path) = resolve_kernel_initrd();
let args = build_qemu_args(
vm,
disks,
&qmp_socket,
&console_log,
kernel_path.as_deref(),
initrd_path.as_deref(),
)?;
let mut cmd = Command::new(&qemu_bin);
cmd.args(&args);
tracing::debug!(
vm_id = %vm.id,
qemu_bin = %qemu_bin.display(),
runtime_dir = %runtime_dir.display(),
qmp_socket = %qmp_socket.display(),
?args,
"Spawning KVM QEMU"
);
let mut child = cmd
.spawn()
.map_err(|e| Error::HypervisorError(format!("Failed to spawn QEMU: {e}")))?;
let pid = child.id().map(|p| p);
// Wait for QMP readiness before detaching so slow nested workers do not leave orphans.
if let Err(err) = wait_for_qmp(&qmp_socket, qmp_timeout()).await {
tracing::warn!(
vm_id = %vm.id,
qmp_socket = %qmp_socket.display(),
?pid,
error = %err,
"QMP socket did not become ready; cleaning up spawned QEMU"
);
let _ = child.start_kill();
let _ = child.wait().await;
let _ = tokio::fs::remove_file(&qmp_socket).await;
return Err(err);
}
// Detach process; lifecycle managed via QMP/kill later.
tokio::spawn(async move {
let _ = child.wait().await;
});
let mut handle = VmHandle::new(vm.id, runtime_dir.to_string_lossy().to_string());
handle
.backend_state
.insert("qmp_socket".into(), qmp_socket.display().to_string());
handle
.backend_state
.insert("console_log".into(), console_log.display().to_string());
handle.pid = pid;
handle.attached_disks = disks.to_vec();
Ok(handle)
}
async fn start(&self, handle: &VmHandle) -> Result<()> {
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
tracing::info!(
vm_id = %handle.vm_id,
qmp_socket = %qmp_socket.display(),
"Starting VM via QMP cont"
);
let mut client = QmpClient::connect(&qmp_socket).await?;
client.command::<Value>("cont", None::<Value>).await?;
Ok(())
}
async fn stop(&self, handle: &VmHandle, timeout: Duration) -> Result<()> {
let qmp_socket = self.qmp_socket_path(handle);
if let Err(e) = wait_for_qmp(&qmp_socket, qmp_timeout()).await {
if vm_stopped_out_of_band(handle, &qmp_socket) {
tracing::info!(vm_id = %handle.vm_id, "VM already stopped before QMP stop");
return Ok(());
}
if let Some(pid) = handle.pid {
tracing::warn!(vm_id = %handle.vm_id, pid, "QMP unavailable; sending SIGKILL");
return kill_pid(pid);
}
return Err(e);
}
tracing::info!(
vm_id = %handle.vm_id,
timeout_secs = timeout.as_secs(),
qmp_socket = %qmp_socket.display(),
"Stopping VM via QMP system_powerdown"
);
let mut client = QmpClient::connect(&qmp_socket).await?;
if let Err(e) = client
.command::<Value>("system_powerdown", None::<Value>)
.await
{
if vm_stopped_out_of_band(handle, &qmp_socket) {
tracing::info!(
vm_id = %handle.vm_id,
error = %e,
"VM exited while handling system_powerdown; treating stop as successful"
);
return Ok(());
}
tracing::warn!(
vm_id = %handle.vm_id,
error = %e,
"QMP powerdown command raced with shutdown; waiting for VM to stop"
);
}
let start = Instant::now();
loop {
if vm_stopped_out_of_band(handle, &qmp_socket) {
break;
}
match QmpClient::connect(&qmp_socket).await {
Ok(mut client) => match client.query_status().await {
Ok(status) if matches!(status.actual_state, VmState::Stopped | VmState::Failed) => {
break;
}
Ok(_) => {}
Err(e) if vm_stopped_out_of_band(handle, &qmp_socket) => break,
Err(e) => {
tracing::debug!(
vm_id = %handle.vm_id,
error = %e,
"QMP query failed while waiting for shutdown"
);
}
},
Err(e) if vm_stopped_out_of_band(handle, &qmp_socket) => break,
Err(e) => {
tracing::debug!(
vm_id = %handle.vm_id,
error = %e,
"QMP reconnect failed while waiting for shutdown"
);
}
}
if start.elapsed() >= timeout {
if let Some(pid) = handle.pid {
tracing::warn!(vm_id = %handle.vm_id, pid, "Stop timed out; sending SIGKILL");
kill_pid(pid)?;
break;
}
return Err(Error::HypervisorError(format!(
"Timeout waiting for VM {} to stop",
handle.vm_id
)));
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
Ok(())
}
async fn kill(&self, handle: &VmHandle) -> Result<()> {
tracing::info!(vm_id = %handle.vm_id, "Force killing VM via QMP quit");
let qmp_socket = self.qmp_socket_path(handle);
match wait_for_qmp(&qmp_socket, qmp_timeout()).await {
Ok(_) => {
let mut client = QmpClient::connect(&qmp_socket).await?;
if let Err(e) = client.command::<Value>("quit", None::<Value>).await {
tracing::warn!(vm_id = %handle.vm_id, error = %e, "QMP quit failed; attempting SIGKILL");
if let Some(pid) = handle.pid {
return kill_pid(pid);
}
return Err(e);
}
}
Err(e) => {
if let Some(pid) = handle.pid {
tracing::warn!(vm_id = %handle.vm_id, pid, "QMP unavailable; attempting SIGKILL");
return kill_pid(pid);
}
return Err(e);
}
}
Ok(())
}
async fn reboot(&self, handle: &VmHandle) -> Result<()> {
tracing::info!(vm_id = %handle.vm_id, "Rebooting VM via QMP system_reset");
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
let mut client = QmpClient::connect(&qmp_socket).await?;
client
.command::<Value>("system_reset", None::<Value>)
.await?;
Ok(())
}
async fn prepare_incoming(
&self,
vm: &VirtualMachine,
listen_uri: &str,
disks: &[AttachedDisk],
) -> Result<VmHandle> {
tracing::info!(
vm_id = %vm.id,
listen_uri,
"Preparing incoming migration listener"
);
let runtime_dir = self.runtime_dir.join(vm.id.to_string());
tokio::fs::create_dir_all(&runtime_dir)
.await
.map_err(|e| Error::HypervisorError(format!("Failed to create runtime dir: {e}")))?;
let qmp_socket = runtime_dir.join("qmp.sock");
let console_log = runtime_dir.join("console.log");
let _ = tokio::fs::remove_file(&qmp_socket).await;
let _ = tokio::fs::remove_file(&console_log).await;
let qemu_bin = resolve_qemu_path(&self.qemu_path);
let (kernel_path, initrd_path) = resolve_kernel_initrd();
let args = build_qemu_args_incoming(
vm,
disks,
&qmp_socket,
&console_log,
kernel_path.as_deref(),
initrd_path.as_deref(),
listen_uri,
)?;
let mut cmd = Command::new(&qemu_bin);
cmd.args(&args);
tracing::debug!(
vm_id = %vm.id,
qemu_bin = %qemu_bin.display(),
runtime_dir = %runtime_dir.display(),
qmp_socket = %qmp_socket.display(),
?args,
"Spawning QEMU for incoming migration"
);
let mut child = cmd
.spawn()
.map_err(|e| Error::HypervisorError(format!("Failed to spawn QEMU: {e}")))?;
let pid = child.id().map(|p| p);
if let Err(err) = wait_for_qmp(&qmp_socket, qmp_timeout()).await {
tracing::warn!(
vm_id = %vm.id,
qmp_socket = %qmp_socket.display(),
?pid,
error = %err,
"Incoming migration QMP socket did not become ready; cleaning up spawned QEMU"
);
let _ = child.start_kill();
let _ = child.wait().await;
let _ = tokio::fs::remove_file(&qmp_socket).await;
return Err(err);
}
tokio::spawn(async move {
let _ = child.wait().await;
});
let mut handle = VmHandle::new(vm.id, runtime_dir.to_string_lossy().to_string());
handle
.backend_state
.insert("qmp_socket".into(), qmp_socket.display().to_string());
handle
.backend_state
.insert("console_log".into(), console_log.display().to_string());
handle.pid = pid;
handle.attached_disks = disks.to_vec();
Ok(handle)
}
async fn migrate(
&self,
handle: &VmHandle,
destination_uri: &str,
timeout: Duration,
wait: bool,
) -> Result<()> {
tracing::info!(
vm_id = %handle.vm_id,
destination_uri,
wait,
"Initiating live migration via QMP"
);
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
let mut client = QmpClient::connect(&qmp_socket).await?;
client
.command("migrate", Some(json!({ "uri": destination_uri })))
.await?;
if !wait {
return Ok(());
}
let start = Instant::now();
loop {
let resp = client
.command::<Value>("query-migrate", None::<Value>)
.await?;
let status = resp
.get("status")
.and_then(Value::as_str)
.unwrap_or("unknown");
match status {
"completed" => return Ok(()),
"failed" | "cancelled" => {
let err = resp
.get("error")
.and_then(Value::as_str)
.unwrap_or("migration failed");
return Err(Error::HypervisorError(format!("Migration failed: {err}")));
}
_ => {}
}
if start.elapsed() >= timeout {
return Err(Error::HypervisorError(format!(
"Timeout waiting for migration of VM {}",
handle.vm_id
)));
}
tokio::time::sleep(Duration::from_millis(200)).await;
}
}
async fn delete(&self, handle: &VmHandle) -> Result<()> {
tracing::info!(vm_id = %handle.vm_id, "Deleting VM resources");
if handle.pid.is_some() || self.qmp_socket_path(handle).exists() {
let _ = self.kill(handle).await;
}
if let Some(pid) = handle.pid {
let deadline = Instant::now() + Duration::from_secs(5);
while pid_running(pid) {
if Instant::now() >= deadline {
return Err(Error::HypervisorError(format!(
"Timed out waiting for VM {} process {} to exit",
handle.vm_id, pid
)));
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
}
let runtime_dir = PathBuf::from(&handle.runtime_dir);
if tokio::fs::try_exists(&runtime_dir)
.await
.map_err(|e| Error::HypervisorError(format!("Failed to inspect runtime dir: {e}")))?
{
tokio::fs::remove_dir_all(&runtime_dir).await.map_err(|e| {
Error::HypervisorError(format!("Failed to remove runtime dir: {e}"))
})?;
}
tracing::info!(vm_id = %handle.vm_id, "Deleted VM resources");
Ok(())
}
async fn status(&self, handle: &VmHandle) -> Result<VmStatus> {
let qmp_socket = self.qmp_socket_path(handle);
tracing::debug!(
vm_id = %handle.vm_id,
qmp_socket = %qmp_socket.display(),
"Querying VM status via QMP"
);
match QmpClient::connect(&qmp_socket).await {
Ok(mut client) => match client.query_status().await {
Ok(status) => Ok(status),
Err(e) if vm_stopped_out_of_band(handle, &qmp_socket) => Ok(stopped_status()),
Err(e) => Err(e),
},
Err(e) if vm_stopped_out_of_band(handle, &qmp_socket) => Ok(stopped_status()),
Err(e) => Err(e),
}
}
async fn attach_disk(&self, handle: &VmHandle, disk: &AttachedDisk) -> Result<()> {
tracing::info!(
vm_id = %handle.vm_id,
disk_id = %disk.id,
"Attaching disk via QMP device_add"
);
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
let mut client = QmpClient::connect(&qmp_socket).await?;
let blockdev_args = match &disk.attachment {
DiskAttachment::File { path, format } => serde_json::json!({
"node-name": format!("drive-{}", disk.id),
"driver": volume_format_name(*format),
"read-only": disk.read_only,
"file": {
"driver": "file",
"filename": path
}
}),
DiskAttachment::Nbd { .. } => {
return Err(Error::UnsupportedFeature(
"KVM hot-plug for NBD-backed disks is not implemented".into(),
));
}
DiskAttachment::CephRbd { .. } => {
return Err(Error::UnsupportedFeature(
"KVM hot-plug for Ceph RBD-backed disks is not implemented".into(),
));
}
};
client.command("blockdev-add", Some(blockdev_args)).await?;
// Step 2: Add virtio-blk-pci frontend device
let device_args = serde_json::json!({
"driver": "virtio-blk-pci",
"id": format!("disk-{}", disk.id),
"drive": format!("drive-{}", disk.id)
});
client.command("device_add", Some(device_args)).await?;
tracing::info!(
vm_id = %handle.vm_id,
disk_id = %disk.id,
"Disk attached successfully"
);
Ok(())
}
async fn detach_disk(&self, handle: &VmHandle, disk_id: &str) -> Result<()> {
tracing::info!(
vm_id = %handle.vm_id,
disk_id = disk_id,
"Detaching disk via QMP device_del"
);
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
let mut client = QmpClient::connect(&qmp_socket).await?;
// Remove the virtio-blk-pci device (backend will be cleaned up automatically)
let device_args = serde_json::json!({
"id": format!("disk-{}", disk_id)
});
client.command("device_del", Some(device_args)).await?;
tracing::info!(
vm_id = %handle.vm_id,
disk_id = disk_id,
"Disk detached successfully"
);
Ok(())
}
async fn attach_nic(&self, handle: &VmHandle, nic: &NetworkSpec) -> Result<()> {
tracing::info!(
vm_id = %handle.vm_id,
nic_id = %nic.id,
"Attaching NIC via QMP device_add"
);
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
let mut client = QmpClient::connect(&qmp_socket).await?;
// Generate MAC address if not provided
let mac_addr = nic
.mac_address
.as_ref()
.map(|s| s.as_str())
.unwrap_or_else(|| {
// Generate a simple MAC (should be more sophisticated in production)
"52:54:00:12:34:56"
});
// Step 1: Add network backend via netdev_add
let netdev_args = serde_json::json!({
"type": "tap",
"id": format!("netdev-{}", nic.id),
"ifname": format!("tap-{}", nic.id),
"script": "no",
"downscript": "no"
});
client.command("netdev_add", Some(netdev_args)).await?;
// Step 2: Add virtio-net-pci frontend device
let device_args = serde_json::json!({
"driver": "virtio-net-pci",
"id": format!("net-{}", nic.id),
"netdev": format!("netdev-{}", nic.id),
"mac": mac_addr
});
client.command("device_add", Some(device_args)).await?;
tracing::info!(
vm_id = %handle.vm_id,
nic_id = %nic.id,
mac = mac_addr,
"NIC attached successfully"
);
Ok(())
}
async fn detach_nic(&self, handle: &VmHandle, nic_id: &str) -> Result<()> {
tracing::info!(
vm_id = %handle.vm_id,
nic_id = nic_id,
"Detaching NIC via QMP device_del"
);
let qmp_socket = self.qmp_socket_path(handle);
wait_for_qmp(&qmp_socket, qmp_timeout()).await?;
let mut client = QmpClient::connect(&qmp_socket).await?;
// Remove the virtio-net-pci device (netdev backend will be cleaned up automatically)
let device_args = serde_json::json!({
"id": format!("net-{}", nic_id)
});
client.command("device_del", Some(device_args)).await?;
tracing::info!(
vm_id = %handle.vm_id,
nic_id = nic_id,
"NIC detached successfully"
);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use plasmavmc_types::DiskSpec;
use tokio::net::UnixListener;
#[test]
fn test_kvm_backend_creation() {
let backend = KvmBackend::with_defaults();
assert_eq!(backend.backend_type(), HypervisorType::Kvm);
}
#[test]
fn test_kvm_capabilities() {
let backend = KvmBackend::with_defaults();
let caps = backend.capabilities();
assert!(caps.live_migration);
assert!(caps.vnc_console);
assert!(caps.serial_console);
assert_eq!(caps.max_vcpus, 256);
}
#[test]
fn test_kvm_supports_all_specs() {
let backend = KvmBackend::with_defaults();
let spec = VmSpec::default();
assert!(backend.supports(&spec).is_ok());
}
#[test]
fn build_qemu_args_contains_qmp_and_memory() {
let _guard = crate::env::env_test_lock().lock().unwrap();
let vm = VirtualMachine::new("vm1", "org", "proj", VmSpec::default());
let qmp = PathBuf::from("/tmp/qmp.sock");
let temp = tempfile::tempdir().unwrap();
let qcow = temp.path().join("image.qcow2");
std::fs::write(&qcow, b"image").unwrap();
std::env::set_var(env::ENV_QCOW2_PATH, &qcow);
let console = PathBuf::from("/tmp/console.log");
let args = build_qemu_args(&vm, &[], &qmp, &console, None, None).unwrap();
let args_joined = args.join(" ");
assert!(args_joined.contains("qmp.sock"));
assert!(args_joined.contains("512")); // default memory MiB
assert!(args_joined.contains("image.qcow2"));
assert!(args_joined.contains("console.log"));
std::env::remove_var(env::ENV_QCOW2_PATH);
}
#[test]
fn build_qemu_args_includes_all_materialized_disks() {
let _guard = crate::env::env_test_lock().lock().unwrap();
let temp = tempfile::tempdir().unwrap();
let volume_dir = temp.path().join("volumes");
std::fs::create_dir_all(&volume_dir).unwrap();
std::fs::write(volume_dir.join("vm-root.qcow2"), b"root").unwrap();
std::fs::write(volume_dir.join("vm-data.qcow2"), b"data").unwrap();
let mut spec = VmSpec::default();
spec.disks = vec![
DiskSpec {
id: "root".into(),
source: plasmavmc_types::DiskSource::Volume {
volume_id: "vm-root".into(),
},
size_gib: 4,
bus: DiskBus::Virtio,
cache: DiskCache::None,
boot_index: Some(1),
},
DiskSpec {
id: "data".into(),
source: plasmavmc_types::DiskSource::Volume {
volume_id: "vm-data".into(),
},
size_gib: 2,
bus: DiskBus::Virtio,
cache: DiskCache::Writeback,
boot_index: None,
},
];
let vm = VirtualMachine::new("vm1", "org", "proj", spec);
let disks = vec![
AttachedDisk {
id: "root".into(),
attachment: DiskAttachment::File {
path: volume_dir.join("vm-root.qcow2").display().to_string(),
format: VolumeFormat::Qcow2,
},
bus: DiskBus::Virtio,
cache: DiskCache::None,
boot_index: Some(1),
read_only: false,
},
AttachedDisk {
id: "data".into(),
attachment: DiskAttachment::File {
path: volume_dir.join("vm-data.qcow2").display().to_string(),
format: VolumeFormat::Qcow2,
},
bus: DiskBus::Virtio,
cache: DiskCache::Writeback,
boot_index: None,
read_only: false,
},
];
let qmp = PathBuf::from("/tmp/qmp.sock");
let console = PathBuf::from("/tmp/console.log");
let args = build_qemu_args(&vm, &disks, &qmp, &console, None, None).unwrap();
let args_joined = args.join(" ");
assert!(args_joined.contains("vm-root.qcow2"));
assert!(args_joined.contains("vm-data.qcow2"));
assert!(args_joined.contains("bootindex=1"));
assert!(args_joined.contains("cache=writeback"));
assert!(args_joined.contains("cache=none,aio=native"));
assert!(args_joined.contains("cache=writeback,aio=threads"));
}
#[test]
fn build_qemu_args_assigns_iothread_to_nbd_virtio_disks() {
let mut spec = VmSpec::default();
spec.cpu.vcpus = 4;
let vm = VirtualMachine::new("vm1", "org", "proj", spec);
let disks = vec![AttachedDisk {
id: "root".into(),
attachment: DiskAttachment::Nbd {
uri: "nbd://10.100.0.11:11000".into(),
format: VolumeFormat::Raw,
},
bus: DiskBus::Virtio,
cache: DiskCache::None,
boot_index: Some(1),
read_only: false,
}];
let qmp = PathBuf::from("/tmp/qmp.sock");
let console = PathBuf::from("/tmp/console.log");
let args = build_qemu_args(&vm, &disks, &qmp, &console, None, None).unwrap();
let args_joined = args.join(" ");
assert!(args_joined.contains("-object iothread,id=iothread-root"));
assert!(args_joined.contains("virtio-blk-pci,drive=drive-root,id=disk-root,iothread=iothread-root,num-queues=4,queue-size=1024,bootindex=1"));
}
#[test]
fn build_qemu_args_coerces_writeback_cache_to_none_for_nbd_disks() {
let _guard = crate::env::env_test_lock().lock().unwrap();
std::env::remove_var(crate::env::ENV_NBD_AIO_MODE);
let vm = VirtualMachine::new("vm1", "org", "proj", VmSpec::default());
let disks = vec![AttachedDisk {
id: "root".into(),
attachment: DiskAttachment::Nbd {
uri: "nbd://10.100.0.11:11000".into(),
format: VolumeFormat::Raw,
},
bus: DiskBus::Virtio,
cache: DiskCache::Writeback,
boot_index: Some(1),
read_only: false,
}];
let qmp = PathBuf::from("/tmp/qmp.sock");
let console = PathBuf::from("/tmp/console.log");
let args = build_qemu_args(&vm, &disks, &qmp, &console, None, None).unwrap();
let args_joined = args.join(" ");
assert!(args_joined.contains("cache=none,aio=io_uring"));
}
#[test]
fn build_qemu_args_uses_io_uring_for_nbd_none_cache_by_default() {
let _guard = crate::env::env_test_lock().lock().unwrap();
std::env::remove_var(crate::env::ENV_NBD_AIO_MODE);
let vm = VirtualMachine::new("vm1", "org", "proj", VmSpec::default());
let disks = vec![AttachedDisk {
id: "root".into(),
attachment: DiskAttachment::Nbd {
uri: "nbd://10.100.0.11:11000".into(),
format: VolumeFormat::Raw,
},
bus: DiskBus::Virtio,
cache: DiskCache::None,
boot_index: Some(1),
read_only: false,
}];
let qmp = PathBuf::from("/tmp/qmp.sock");
let console = PathBuf::from("/tmp/console.log");
let args = build_qemu_args(&vm, &disks, &qmp, &console, None, None).unwrap();
let args_joined = args.join(" ");
assert!(args_joined.contains("cache=none,aio=io_uring"));
}
#[test]
fn build_qemu_args_honors_nbd_aio_override() {
let _guard = crate::env::env_test_lock().lock().unwrap();
std::env::set_var(crate::env::ENV_NBD_AIO_MODE, "threads");
let vm = VirtualMachine::new("vm1", "org", "proj", VmSpec::default());
let disks = vec![AttachedDisk {
id: "root".into(),
attachment: DiskAttachment::Nbd {
uri: "nbd://10.100.0.11:11000".into(),
format: VolumeFormat::Raw,
},
bus: DiskBus::Virtio,
cache: DiskCache::None,
boot_index: Some(1),
read_only: false,
}];
let qmp = PathBuf::from("/tmp/qmp.sock");
let console = PathBuf::from("/tmp/console.log");
let args = build_qemu_args(&vm, &disks, &qmp, &console, None, None).unwrap();
let args_joined = args.join(" ");
assert!(args_joined.contains("cache=none,aio=threads"));
std::env::remove_var(crate::env::ENV_NBD_AIO_MODE);
}
#[tokio::test]
async fn wait_for_qmp_succeeds_after_socket_created() {
let dir = tempfile::tempdir().unwrap();
let socket_path = dir.path().join("qmp.sock");
let socket_clone = socket_path.clone();
tokio::spawn(async move {
tokio::time::sleep(Duration::from_millis(100)).await;
let _listener = UnixListener::bind(socket_clone).expect("bind socket");
// Keep listener alive briefly
tokio::time::sleep(Duration::from_millis(200)).await;
});
wait_for_qmp(&socket_path, Duration::from_secs(1))
.await
.expect("qmp became ready");
}
// Integration smoke: requires env to point to QEMU and a qcow2 image.
#[tokio::test]
#[ignore]
async fn integration_create_start_status_stop() {
let _guard = crate::env::env_test_lock().lock().unwrap();
let qemu = std::env::var(env::ENV_QEMU_PATH)
.unwrap_or_else(|_| "/usr/bin/qemu-system-x86_64".into());
let qcow = match std::env::var(env::ENV_QCOW2_PATH) {
Ok(path) => path,
Err(_) => {
eprintln!("Skipping integration: {} not set", env::ENV_QCOW2_PATH);
return;
}
};
if !Path::new(&qemu).exists() || !Path::new(&qcow).exists() {
eprintln!("Skipping integration: qemu or qcow2 path missing");
return;
}
let backend = KvmBackend::new(qemu, tempfile::tempdir().unwrap().into_path());
let vm = VirtualMachine::new("int", "org", "proj", VmSpec::default());
let handle = backend.create(&vm, &[]).await.expect("create vm");
backend.start(&handle).await.expect("start vm");
let status = backend.status(&handle).await.expect("status vm");
assert!(
matches!(
status.actual_state,
VmState::Running | VmState::Stopped | VmState::Error
),
"unexpected state: {:?}",
status.actual_state
);
backend
.stop(&handle, Duration::from_secs(2))
.await
.expect("stop vm");
}
}