From db6daaa697dbce9ac3fe6596482ccfaa8604d1b7 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 10 Jul 2025 12:39:19 +0000 Subject: [PATCH 01/12] Add pre_backup/post_backup guest api --- guest-agent/src/guest_api_service.rs | 17 +++++++++++++++++ guest-api/proto/guest_api.proto | 6 ++++++ vmm/src/guest_api_service.rs | 8 ++++++++ 3 files changed, 31 insertions(+) diff --git a/guest-agent/src/guest_api_service.rs b/guest-agent/src/guest_api_service.rs index 1cbfeca3..ea7b988a 100644 --- a/guest-agent/src/guest_api_service.rs +++ b/guest-agent/src/guest_api_service.rs @@ -18,6 +18,8 @@ use tracing::error; use crate::{rpc_service::ExternalRpcHandler, AppState}; +const BACKUP_LOCK_FILE: &str = "/run/dstack-backup.lock"; + pub struct GuestApiHandler { state: AppState, } @@ -43,6 +45,7 @@ impl GuestApiRpc for GuestApiHandler { device_id: info.device_id, app_cert: info.app_cert, tcb_info: info.tcb_info, + backup_in_progress: fs::metadata(BACKUP_LOCK_FILE).is_ok(), }) } @@ -112,6 +115,20 @@ impl GuestApiRpc for GuestApiHandler { async fn list_containers(self) -> Result { list_containers().await } + + async fn pre_backup(self) -> Result<()> { + fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(BACKUP_LOCK_FILE) + .context("Failed to create backup lock file, there is another backup in progress")?; + Ok(()) + } + + async fn post_backup(self) -> Result<()> { + fs::remove_file(BACKUP_LOCK_FILE).context("Failed to remove backup lock file")?; + Ok(()) + } } pub(crate) async fn list_containers() -> Result { diff --git a/guest-api/proto/guest_api.proto b/guest-api/proto/guest_api.proto index 0e1d7898..f7f6b5ab 100644 --- a/guest-api/proto/guest_api.proto +++ b/guest-api/proto/guest_api.proto @@ -22,6 +22,8 @@ message GuestInfo { string tcb_info = 5; // Device ID bytes device_id = 6; + // true if backup is in progress + bool backup_in_progress = 7; } message IpAddress { @@ -123,6 +125,8 @@ service GuestApi { rpc NetworkInfo(google.protobuf.Empty) returns (NetworkInformation); rpc ListContainers(google.protobuf.Empty) returns (ListContainersResponse); rpc Shutdown(google.protobuf.Empty) returns (google.protobuf.Empty); + rpc PreBackup(google.protobuf.Empty) returns (google.protobuf.Empty); + rpc PostBackup(google.protobuf.Empty) returns (google.protobuf.Empty); } service ProxiedGuestApi { @@ -131,4 +135,6 @@ service ProxiedGuestApi { rpc NetworkInfo(Id) returns (NetworkInformation); rpc ListContainers(Id) returns (ListContainersResponse); rpc Shutdown(Id) returns (google.protobuf.Empty); + rpc PreBackup(Id) returns (google.protobuf.Empty); + rpc PostBackup(Id) returns (google.protobuf.Empty); } diff --git a/vmm/src/guest_api_service.rs b/vmm/src/guest_api_service.rs index 792a937d..4b7e782c 100644 --- a/vmm/src/guest_api_service.rs +++ b/vmm/src/guest_api_service.rs @@ -51,4 +51,12 @@ impl ProxiedGuestApiRpc for GuestApiHandler { async fn shutdown(self, request: Id) -> Result<()> { self.guest_agent_client(&request.id)?.shutdown().await } + + async fn pre_backup(self, request: Id) -> Result<()> { + self.guest_agent_client(&request.id)?.pre_backup().await + } + + async fn post_backup(self, request: Id) -> Result<()> { + self.guest_agent_client(&request.id)?.post_backup().await + } } From b1df1aac54da8d157e5a02ab3d9e93571fbf6554 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 11 Jul 2025 03:57:50 +0000 Subject: [PATCH 02/12] vmm: Add backup api --- vmm/rpc/proto/vmm_rpc.proto | 25 ++++++++ vmm/src/app.rs | 111 +++++++++++++++++++++++++++++++++++- vmm/src/main_service.rs | 11 +++- 3 files changed, 144 insertions(+), 3 deletions(-) diff --git a/vmm/rpc/proto/vmm_rpc.proto b/vmm/rpc/proto/vmm_rpc.proto index ab51519e..edaf8ecf 100644 --- a/vmm/rpc/proto/vmm_rpc.proto +++ b/vmm/rpc/proto/vmm_rpc.proto @@ -223,6 +223,25 @@ message GpuInfo { bool is_free = 4; } +message BackupDiskRequest { + // vm id + string id = 1; + // full or incremental + string level = 2; +} + +message BackupInfo { + // filename (e.g., FULL-1694222400-hd1.img) + string filename = 1; + // size of the backup in bytes + uint64 size = 2; +} + +message ListBackupsResponse { + // list of backups + repeated BackupInfo backups = 1; +} + // Service definition for dstack-vmm service Vmm { // RPC to create a VM @@ -261,4 +280,10 @@ service Vmm { // List GPUs rpc ListGpus(google.protobuf.Empty) returns (ListGpusResponse); + + // Backup a VM data disk + rpc BackupDisk(BackupDiskRequest) returns (google.protobuf.Empty); + + // List backups for a VM + rpc ListBackups(Id) returns (ListBackupsResponse); } diff --git a/vmm/src/app.rs b/vmm/src/app.rs index be464fca..ad8fc648 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -6,7 +6,9 @@ use dstack_kms_rpc::kms_client::KmsClient; use dstack_types::shared_filenames::{ compat_v3, APP_COMPOSE, ENCRYPTED_ENV, INSTANCE_INFO, SYS_CONFIG, USER_CONFIG, }; -use dstack_vmm_rpc::{self as pb, GpuInfo, StatusRequest, StatusResponse, VmConfiguration}; +use dstack_vmm_rpc::{ + self as pb, BackupInfo, GpuInfo, StatusRequest, StatusResponse, VmConfiguration, +}; use fs_err as fs; use guest_api::client::DefaultClient as GuestClient; use id_pool::IdPool; @@ -18,7 +20,7 @@ use std::net::IpAddr; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex, MutexGuard}; use supervisor_client::SupervisorClient; -use tracing::{error, info}; +use tracing::{error, info, warn}; pub use image::{Image, ImageInfo}; pub use qemu::{VmConfig, VmWorkDir}; @@ -647,6 +649,111 @@ impl App { } Ok(()) } + + pub(crate) async fn backup_disk(&self, id: &str, level: &str) -> Result<()> { + let work_dir = self.work_dir(id); + + // Determine backup level based on the backup_type + let backup_level = match level { + "full" => "full", + "incremental" => "inc", + _ => bail!("Invalid backup level: {level}"), + }; + + // Get the VM directory path as a string + let backup_dir = work_dir.path().join("backups"); + let qmp_socket = work_dir.qmp_socket().to_string_lossy().to_string(); + + // Create backup directory if it doesn't exist + tokio::fs::create_dir_all(&backup_dir) + .await + .context("Failed to create backup directory")?; + + // Run the qmpbackup command in a blocking thread pool since it takes seconds to complete + tokio::task::spawn_blocking(move || { + let output = std::process::Command::new("qmpbackup") + .arg("--socket") + .arg(qmp_socket) + .arg("backup") + .arg("-i") + .arg("hd1") + .arg("--no-subdir") + .arg("-t") + .arg(&backup_dir) + .arg("-T") + .arg("-l") + .arg(backup_level) + .output(); + + match output { + Ok(output) => { + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + Err(anyhow::anyhow!("qmpbackup command failed: {}", stderr)) + } else { + Ok(()) + } + } + Err(e) => Err(anyhow::anyhow!( + "Failed to execute qmpbackup command: {}", + e + )), + } + }) + .await + .context("Failed to execute backup task")? + } + + pub(crate) async fn list_backups(&self, id: &str) -> Result> { + let work_dir = self.work_dir(id); + let backup_dir = work_dir.path().join("backups"); + + // Create backup directory if it doesn't exist + if !backup_dir.exists() { + return Ok(Vec::new()); + } + + // List backup files in the directory + let mut backups = Vec::new(); + + // Read directory entries in a blocking task + let backup_dir_clone = backup_dir.clone(); + let entries = + std::fs::read_dir(backup_dir_clone).context("Failed to read backup directory")?; + // Process each entry + for entry in entries { + let path = match entry { + Ok(entry) => entry.path(), + Err(e) => { + warn!("Failed to read directory entry: {e:?}"); + continue; + } + }; + // Skip if not a file + if !path.is_file() { + continue; + } + + // Get file name + let file_name = match path.file_name().and_then(|n| n.to_str()) { + Some(name) => name.to_string(), + None => continue, + }; + + if !file_name.ends_with(".img") { + continue; + } + + backups.push(BackupInfo { + filename: file_name, + size: path + .metadata() + .context("Failed to get file metadata")? + .len(), + }); + } + Ok(backups) + } } fn paginate(items: Vec, page: u32, page_size: u32) -> impl Iterator { diff --git a/vmm/src/main_service.rs b/vmm/src/main_service.rs index 3c117d30..f822b675 100644 --- a/vmm/src/main_service.rs +++ b/vmm/src/main_service.rs @@ -3,8 +3,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; use anyhow::{anyhow, bail, Context, Result}; use dstack_types::AppCompose; -use dstack_vmm_rpc as rpc; use dstack_vmm_rpc::vmm_server::{VmmRpc, VmmServer}; +use dstack_vmm_rpc::{self as rpc, BackupDiskRequest}; use dstack_vmm_rpc::{ AppId, ComposeHash as RpcComposeHash, GatewaySettings, GetInfoResponse, GetMetaResponse, Id, ImageInfo as RpcImageInfo, ImageListResponse, KmsSettings, ListGpusResponse, PublicKeyResponse, @@ -456,6 +456,15 @@ impl VmmRpc for RpcHandler { let hash = hex_sha256(&request.compose_file); Ok(RpcComposeHash { hash }) } + + async fn backup_disk(self, request: BackupDiskRequest) -> Result<()> { + self.app.backup_disk(&request.id, &request.level).await + } + + async fn list_backups(self, request: Id) -> Result { + let backups = self.app.list_backups(&request.id).await?; + Ok(rpc::ListBackupsResponse { backups }) + } } impl RpcCall for RpcHandler { From 5b60e199acc81676724bc2717ac1f483c475f271 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sun, 13 Jul 2025 00:37:12 +0000 Subject: [PATCH 03/12] vmm: Implement backup deletion and restore --- Cargo.lock | 2 + certbot/cli/src/main.rs | 2 +- certbot/src/workdir.rs | 12 +- gateway/src/config.rs | 2 +- vmm/Cargo.toml | 2 + vmm/rpc/proto/vmm_rpc.proto | 32 ++- vmm/src/app.rs | 242 ++++++++++++++++++----- vmm/src/config.rs | 25 ++- vmm/src/console.html | 377 +++++++++++++++++++++++++++++++++++- vmm/src/main_service.rs | 16 +- vmm/vmm.toml | 7 +- 11 files changed, 642 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b23f9de3..4303a9a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2249,6 +2249,7 @@ dependencies = [ "anyhow", "base64", "bon", + "chrono", "clap", "dirs", "dstack-kms-rpc", @@ -2272,6 +2273,7 @@ dependencies = [ "rocket-vsock-listener", "safe-write", "serde", + "serde-duration", "serde-human-bytes", "serde_json", "sha2", diff --git a/certbot/cli/src/main.rs b/certbot/cli/src/main.rs index de44ef0c..11d7a017 100644 --- a/certbot/cli/src/main.rs +++ b/certbot/cli/src/main.rs @@ -121,7 +121,7 @@ fn load_config(config: &PathBuf) -> Result { let renew_timeout = Duration::from_secs(config.renew_timeout); let bot_config = CertBotConfig::builder() .acme_url(config.acme_url) - .cert_dir(workdir.backup_dir()) + .cert_dir(workdir.cert_backup_dir()) .cert_file(workdir.cert_path()) .key_file(workdir.key_path()) .auto_create_account(true) diff --git a/certbot/src/workdir.rs b/certbot/src/workdir.rs index 95dff248..4ca4cd5d 100644 --- a/certbot/src/workdir.rs +++ b/certbot/src/workdir.rs @@ -27,24 +27,24 @@ impl WorkDir { self.workdir.join("credentials.json") } - pub fn backup_dir(&self) -> PathBuf { + pub fn cert_backup_dir(&self) -> PathBuf { self.workdir.join("backup") } - pub fn live_dir(&self) -> PathBuf { + pub fn cert_live_dir(&self) -> PathBuf { self.workdir.join("live") } pub fn cert_path(&self) -> PathBuf { - self.live_dir().join("cert.pem") + self.cert_live_dir().join("cert.pem") } pub fn key_path(&self) -> PathBuf { - self.live_dir().join("key.pem") + self.cert_live_dir().join("key.pem") } pub fn list_certs(&self) -> Result> { - crate::bot::list_certs(self.backup_dir()) + crate::bot::list_certs(self.cert_backup_dir()) } pub fn acme_account_uri(&self) -> Result { @@ -58,6 +58,6 @@ impl WorkDir { } pub fn list_cert_public_keys(&self) -> Result>> { - crate::bot::list_cert_public_keys(self.backup_dir()) + crate::bot::list_cert_public_keys(self.cert_backup_dir()) } } diff --git a/gateway/src/config.rs b/gateway/src/config.rs index 07f1c432..acfee181 100644 --- a/gateway/src/config.rs +++ b/gateway/src/config.rs @@ -210,7 +210,7 @@ impl CertbotConfig { let workdir = certbot::WorkDir::new(&self.workdir); certbot::CertBotConfig::builder() .auto_create_account(true) - .cert_dir(workdir.backup_dir()) + .cert_dir(workdir.cert_backup_dir()) .cert_file(workdir.cert_path()) .key_file(workdir.key_path()) .credentials_file(workdir.account_credentials_path()) diff --git a/vmm/Cargo.toml b/vmm/Cargo.toml index 6283f494..a085bd98 100644 --- a/vmm/Cargo.toml +++ b/vmm/Cargo.toml @@ -44,6 +44,8 @@ hex_fmt.workspace = true lspci.workspace = true base64.workspace = true serde-human-bytes.workspace = true +serde-duration.workspace = true +chrono.workspace = true [dev-dependencies] insta.workspace = true diff --git a/vmm/rpc/proto/vmm_rpc.proto b/vmm/rpc/proto/vmm_rpc.proto index edaf8ecf..af4d4d91 100644 --- a/vmm/rpc/proto/vmm_rpc.proto +++ b/vmm/rpc/proto/vmm_rpc.proto @@ -225,23 +225,39 @@ message GpuInfo { message BackupDiskRequest { // vm id - string id = 1; + string vm_id = 1; // full or incremental string level = 2; } message BackupInfo { - // filename (e.g., FULL-1694222400-hd1.img) - string filename = 1; + // Group id + string backup_id = 1; + // id of the snapshot + string snapshot_id = 2; + // timestamp + string timestamp = 3; + // level: full or incremental + string level = 4; // size of the backup in bytes - uint64 size = 2; + uint64 size = 5; } message ListBackupsResponse { - // list of backups repeated BackupInfo backups = 1; } +message DeleteBackupRequest { + string vm_id = 1; + string backup_id = 2; +} + +message RestoreBackupRequest { + string vm_id = 1; + string backup_id = 2; + string snapshot_id = 3; +} + // Service definition for dstack-vmm service Vmm { // RPC to create a VM @@ -286,4 +302,10 @@ service Vmm { // List backups for a VM rpc ListBackups(Id) returns (ListBackupsResponse); + + // Delete a backup + rpc DeleteBackup(DeleteBackupRequest) returns (google.protobuf.Empty); + + // Restore a backup + rpc RestoreBackup(RestoreBackupRequest) returns (google.protobuf.Empty); } diff --git a/vmm/src/app.rs b/vmm/src/app.rs index ad8fc648..0f312666 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -18,6 +18,7 @@ use serde_json::json; use std::collections::{BTreeSet, HashMap}; use std::net::IpAddr; use std::path::{Path, PathBuf}; +use std::process::Command; use std::sync::{Arc, Mutex, MutexGuard}; use supervisor_client::SupervisorClient; use tracing::{error, info, warn}; @@ -114,6 +115,13 @@ pub struct App { state: Arc>, } +fn validate_filename(s: &str) -> Result<()> { + if s.contains("/") || s.contains("\\") { + bail!("Invalid filename"); + } + Ok(()) +} + impl App { fn lock(&self) -> MutexGuard { self.state.lock().unwrap() @@ -127,6 +135,21 @@ impl App { VmWorkDir::new(self.config.run_path.join(id)) } + fn backups_dir(&self, id: &str) -> PathBuf { + self.config.cvm.backup.path.join(id).join("backups") + } + + fn backup_dir(&self, id: &str, backup_id: &str) -> Result { + validate_filename(backup_id)?; + let backup_dir = self.backups_dir(id).join(backup_id); + Ok(backup_dir) + } + + fn backup_file(&self, id: &str, backup_id: &str, snapshot_id: &str) -> Result { + validate_filename(snapshot_id)?; + Ok(self.backup_dir(id, backup_id)?.join(snapshot_id)) + } + pub fn new(config: Config, supervisor: SupervisorClient) -> Self { let cid_start = config.cvm.cid_start; let cid_end = cid_start.saturating_add(config.cvm.cid_pool_size); @@ -651,7 +674,11 @@ impl App { } pub(crate) async fn backup_disk(&self, id: &str, level: &str) -> Result<()> { + if !self.config.cvm.backup.enabled { + bail!("Backup is not enabled"); + } let work_dir = self.work_dir(id); + let backup_dir = self.backups_dir(id); // Determine backup level based on the backup_type let backup_level = match level { @@ -660,100 +687,217 @@ impl App { _ => bail!("Invalid backup level: {level}"), }; - // Get the VM directory path as a string - let backup_dir = work_dir.path().join("backups"); - let qmp_socket = work_dir.qmp_socket().to_string_lossy().to_string(); + let qmp_socket = work_dir.qmp_socket(); - // Create backup directory if it doesn't exist - tokio::fs::create_dir_all(&backup_dir) - .await - .context("Failed to create backup directory")?; - - // Run the qmpbackup command in a blocking thread pool since it takes seconds to complete + let id = id.to_string(); tokio::task::spawn_blocking(move || { - let output = std::process::Command::new("qmpbackup") + let latest_dir = backup_dir.join("latest"); + if backup_level == "full" { + // clear the bitmaps + let output = Command::new("qmpbackup") + .arg("--socket") + .arg(&qmp_socket) + .arg("cleanup") + .arg("--remove-bitmap") + .output() + .context("Failed to clear bitmaps")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + warn!("Failed to clear bitmaps for {id}: {stderr}"); + } + // Switch to new dir and symbol link the latest to it + let timestamp = chrono::Utc::now().format("%Y%m%dZ%H%M%S").to_string(); + let new_dir = backup_dir.join(×tamp); + fs::create_dir_all(&new_dir).context("Failed to create backup directory")?; + if fs::symlink_metadata(&latest_dir).is_ok() { + fs::remove_file(&latest_dir) + .context("Failed to remove latest directory link")?; + } + fs::os::unix::fs::symlink(×tamp, &latest_dir) + .context("Failed to create latest directory link")?; + } + let output = Command::new("qmpbackup") .arg("--socket") - .arg(qmp_socket) + .arg(&qmp_socket) .arg("backup") .arg("-i") .arg("hd1") .arg("--no-subdir") .arg("-t") - .arg(&backup_dir) - .arg("-T") + .arg(&latest_dir) .arg("-l") .arg(backup_level) - .output(); - - match output { - Ok(output) => { - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - Err(anyhow::anyhow!("qmpbackup command failed: {}", stderr)) - } else { - Ok(()) - } - } - Err(e) => Err(anyhow::anyhow!( - "Failed to execute qmpbackup command: {}", - e - )), + .output() + .context("Failed to execute qmpbackup command")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + warn!("Failed to backup disk for {id}: {stderr}"); } + Ok(()) }) .await .context("Failed to execute backup task")? } pub(crate) async fn list_backups(&self, id: &str) -> Result> { - let work_dir = self.work_dir(id); - let backup_dir = work_dir.path().join("backups"); + let backup_dir = self.backups_dir(id); // Create backup directory if it doesn't exist if !backup_dir.exists() { return Ok(Vec::new()); } - // List backup files in the directory + // List backup groups in the directory let mut backups = Vec::new(); // Read directory entries in a blocking task let backup_dir_clone = backup_dir.clone(); - let entries = + let backup_entries = std::fs::read_dir(backup_dir_clone).context("Failed to read backup directory")?; + + fn filename(path: &Path) -> Option { + path.file_name() + .and_then(|n| n.to_str().map(|s| s.to_string())) + } + // Process each entry - for entry in entries { - let path = match entry { + for backup_entry in backup_entries { + let backup_path = match backup_entry { Ok(entry) => entry.path(), Err(e) => { warn!("Failed to read directory entry: {e:?}"); continue; } }; - // Skip if not a file - if !path.is_file() { + if !backup_path.is_dir() { continue; } - - // Get file name - let file_name = match path.file_name().and_then(|n| n.to_str()) { - Some(name) => name.to_string(), - None => continue, - }; - - if !file_name.ends_with(".img") { + if backup_path.ends_with("latest") { continue; } + let backup_id = filename(&backup_path).context("Failed to get group name")?; + let snaps = match std::fs::read_dir(backup_path) { + Ok(entries) => entries, + Err(e) => { + warn!("Failed to read directory entry: {e:?}"); + continue; + } + }; + for snap in snaps { + let snap_path = match snap { + Ok(entry) => entry.path(), + Err(e) => { + warn!("Failed to read directory entry: {e:?}"); + continue; + } + }; + if !snap_path.is_file() { + continue; + } + // Get file name + let snap_filename = filename(&snap_path).context("Failed to get file name")?; - backups.push(BackupInfo { - filename: file_name, - size: path + if !snap_filename.ends_with(".img") { + continue; + } + let parts = snap_filename + .split('.') + .next() + .context("Failed to split filename")? + .split('-') + .collect::>(); + let [level, timestamp, _] = parts[..] else { + warn!("Invalid backup filename: {snap_filename}"); + continue; + }; + let size = snap_path .metadata() .context("Failed to get file metadata")? - .len(), - }); + .len(); + backups.push(BackupInfo { + backup_id: backup_id.clone(), + snapshot_id: snap_filename.clone(), + timestamp: timestamp.to_string(), + level: level.to_string(), + size, + }); + } } Ok(backups) } + + pub(crate) async fn delete_backup(&self, vm_id: &str, backup_id: &str) -> Result<()> { + if !self.config.cvm.backup.enabled { + bail!("Backup is not enabled"); + } + let backup_dir = self.backup_dir(vm_id, backup_id)?; + if !backup_dir.exists() { + bail!("Backup does not exist"); + } + if !backup_dir.is_dir() { + bail!("Backup is not a directory"); + } + fs::remove_dir_all(&backup_dir).context("Failed to remove backup directory")?; + Ok(()) + } + + pub(crate) async fn restore_backup( + &self, + vm_id: &str, + backup_id: &str, + snapshot_id: &str, + ) -> Result<()> { + if !self.config.cvm.backup.enabled { + bail!("Backup is not enabled"); + } + // First, ensure the vm is stopped + let info = self.vm_info(vm_id).await?.context("VM not found")?; + if info.status != "stopped" { + bail!("VM is not stopped: status={}", info.status); + } + + let backup_file = self.backup_file(vm_id, backup_id, snapshot_id)?; + if !backup_file.exists() { + bail!("Backup file not found"); + } + let vm_work_dir = self.work_dir(vm_id); + let hda_img = vm_work_dir.hda_path(); + if snapshot_id.starts_with("FULL") { + // Just copy the file + tokio::fs::copy(&backup_file, &hda_img).await?; + } else { + let backup_dir = self.backup_dir(vm_id, backup_id)?; + let snapshot_id = snapshot_id.to_string(); + // Rename the current hda file to *.bak + let bak_file = hda_img.display().to_string() + ".bak"; + fs::rename(&hda_img, &bak_file).context("Failed to rename hda file")?; + + tokio::task::spawn_blocking(move || { + /* + qmprestore merge --dir --until --targetfile + */ + let mut command = Command::new("qmprestore"); + command.arg("merge"); + command.arg("--dir").arg(&backup_dir); + command.arg("--until").arg(snapshot_id); + command.arg("--targetfile").arg(&hda_img); + let output = command + .output() + .context("Failed to execute qmprestore command")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + bail!("Failed to restore backup: {stderr}:{stdout}"); + } + Ok(()) + }) + .await + .context("Failed to spawn restore command")? + .context("Failed to restore backup")?; + } + Ok(()) + } } fn paginate(items: Vec, page: u32, page_size: u32) -> impl Iterator { diff --git a/vmm/src/config.rs b/vmm/src/config.rs index 545c97f9..0dd11ef2 100644 --- a/vmm/src/config.rs +++ b/vmm/src/config.rs @@ -130,6 +130,8 @@ pub struct CvmConfig { pub qemu_pci_hole64_size: u64, /// QEMU hotplug_off pub qemu_hotplug_off: bool, + /// Backup configuration + pub backup: BackupConfig, } #[derive(Debug, Clone, Deserialize)] @@ -196,11 +198,15 @@ pub struct GatewayConfig { pub agent_port: u16, } +#[derive(Debug, Clone, Deserialize)] +pub struct BackupConfig { + pub enabled: bool, + pub path: PathBuf, +} + #[derive(Debug, Clone, Deserialize)] pub struct Config { - #[serde(default)] pub image_path: PathBuf, - #[serde(default)] pub run_path: PathBuf, /// The URL of the KMS server pub kms_url: String, @@ -227,12 +233,15 @@ pub struct Config { } impl Config { - pub fn abs_path(self) -> Result { - Ok(Self { - image_path: self.image_path.absolutize()?.to_path_buf(), - run_path: self.run_path.absolutize()?.to_path_buf(), - ..self - }) + pub fn abs_path(mut self) -> Result { + fn absolutize(path: &mut PathBuf) -> Result<()> { + *path = path.absolutize()?.to_path_buf(); + Ok(()) + } + absolutize(&mut self.image_path)?; + absolutize(&mut self.run_path)?; + absolutize(&mut self.cvm.backup.path)?; + Ok(self) } } diff --git a/vmm/src/console.html b/vmm/src/console.html index 52f95f00..c1696272 100644 --- a/vmm/src/console.html +++ b/vmm/src/console.html @@ -251,6 +251,124 @@ background: #FF9800; color: white; } + + /* Backup dialog styles */ + .tabs { + display: flex; + border-bottom: 1px solid #ddd; + margin-bottom: 20px; + } + + .tab-btn { + padding: 10px 20px; + background: none; + border: none; + cursor: pointer; + font-size: 14px; + font-weight: 500; + color: #666; + } + + .tab-btn.active { + color: #1976D2; + border-bottom: 2px solid #1976D2; + } + + .tab-content { + padding: 10px 0; + } + + .backup-table { + width: 100%; + border-collapse: collapse; + margin-bottom: 16px; + } + + .backup-table th, + .backup-table td { + padding: 8px; + text-align: left; + border-bottom: 1px solid #ddd; + } + + .backup-list { + max-height: 300px; + overflow-y: auto; + margin-bottom: 16px; + } + + .backup-container { + display: flex; + flex-direction: column; + gap: 20px; + } + + .backup-section { + background-color: #f9f9f9; + padding: 16px; + border-radius: 8px; + border: 1px solid #eee; + } + + .backup-section h4 { + margin-top: 0; + margin-bottom: 16px; + color: #333; + } + + .backup-group { + margin-bottom: 24px; + border: 1px solid #ddd; + border-radius: 6px; + overflow: hidden; + } + + .backup-group-header { + background-color: #f0f0f0; + padding: 10px 16px; + border-bottom: 1px solid #ddd; + } + + .backup-group-header h5 { + margin: 0; + font-size: 14px; + color: #444; + } + + .backup-group-actions { + padding: 10px; + background-color: #f5f5f5; + text-align: right; + } + + .action-buttons { + display: flex; + gap: 8px; + } + + .no-backups { + padding: 20px; + text-align: center; + color: #666; + background-color: #f9f9f9; + border-radius: 6px; + } + + .loading-spinner { + display: inline-block; + width: 16px; + height: 16px; + border: 2px solid rgba(255, 255, 255, 0.3); + border-radius: 50%; + border-top-color: white; + animation: spin 1s ease-in-out infinite; + } + + @keyframes spin { + to { + transform: rotate(360deg); + } + }