From 8cfe8579d4e02de1b6354237e6a7cae756e61cff Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Sat, 14 Sep 2024 05:35:57 +0300 Subject: [PATCH 1/2] Prefer additional network over user network If using additional network (e.g. lima:shared), override the dhcp rote metric so the additional metric is preferred. This fixes issues with k8s components (like submariner) that use the first default route and break since the user network has not connectivity between vms. With this change they always use the additional network. Example routes with this change when using socket_vmnet: $ ip route default via 192.168.105.1 dev lima0 proto dhcp src 192.168.105.7 metric 100 default via 192.168.5.2 dev eth0 proto dhcp src 192.168.5.15 metric 200 192.168.5.0/24 dev eth0 proto kernel scope link src 192.168.5.15 metric 200 192.168.5.2 dev eth0 proto dhcp scope link src 192.168.5.15 metric 200 192.168.105.0/24 dev lima0 proto kernel scope link src 192.168.105.7 metric 100 192.168.105.1 dev lima0 proto dhcp scope link src 192.168.105.7 metric 100 Example routes without additional network: $ ip route default via 192.168.5.2 dev eth0 proto dhcp src 192.168.5.15 metric 200 192.168.5.0/24 dev eth0 proto kernel scope link src 192.168.5.15 metric 200 192.168.5.2 dev eth0 proto dhcp scope link src 192.168.5.15 metric 200 Another way to solve this issue is to fixup the metric in the provisioning script as done in RamenDR: https://github.com/RamenDR/ramen/blob/c02119785e734e15511236edd935c04ff71b6646/test/drenv/providers/lima/k8s.yaml#L37 But I think it is better to fix this in lima, since the current network configuration is very problematic. Signed-off-by: Nir Soffer --- pkg/cidata/cidata.TEMPLATE.d/network-config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/cidata/cidata.TEMPLATE.d/network-config b/pkg/cidata/cidata.TEMPLATE.d/network-config index b8a43683e51..ff291030e0b 100644 --- a/pkg/cidata/cidata.TEMPLATE.d/network-config +++ b/pkg/cidata/cidata.TEMPLATE.d/network-config @@ -6,6 +6,12 @@ ethernets: macaddress: '{{$nw.MACAddress}}' dhcp4: true set-name: {{$nw.Interface}} + dhcp4-overrides: + {{- if (eq $nw.Interface $.SlirpNICName) }} + route-metric: 200 + {{- else }} + route-metric: 100 + {{- end }} {{- if and (eq $nw.Interface $.SlirpNICName) (gt (len $.DNSAddresses) 0) }} nameservers: addresses: From a1ff7905ad63a9f2366e65745a5cb517db4d472e Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Sat, 14 Sep 2024 06:43:20 +0300 Subject: [PATCH 2/2] Add k8s-vmnet example This example show how to run multiple connected kubernetes cluster using a shared network. You can access all the clusters via the shared network, clusters can access each other. This configuration was derived from k8s.yaml template, with the following changes: - Ensure that the API server and kublet listen on the shared network. This is important for pods that need to listen on the host network. An example is rook ceph test cluster. - Disable port forwarding, since we access the host via the shared network, and automatic port forwarding cannot work for multiple clusters. - Since we access the API server via the shared network, don't modify the kubeconfig copied to the host. - Enable parallel image pulls for faster provisioning of complex clusters. - Allow unprivileged pods to access block devices. Required for kubevirt virtual machines or replicating block volumes using volsync. These changes were extracted from RamenDR k8s.yaml: https://github.com/RamenDR/ramen/blob/main/test/drenv/providers/lima/k8s.yaml Signed-off-by: Nir Soffer --- examples/k8s-vmnet.yaml | 205 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 examples/k8s-vmnet.yaml diff --git a/examples/k8s-vmnet.yaml b/examples/k8s-vmnet.yaml new file mode 100644 index 00000000000..603baa5d43f --- /dev/null +++ b/examples/k8s-vmnet.yaml @@ -0,0 +1,205 @@ +# Deploy multipole connected kubernetes clusters via vmnet. +# +# Creating the clusters: +# +# $ limactl start --name c1 --tty=false template://k8s-vmnet & +# $ limactl start --name c2 --tty=false template://k8s-vmnet & +# $ wait +# +# Accessing the clusters with limactl: +# +# $ limactl shell c1 kubectl get node +# NAME STATUS ROLES AGE VERSION +# lima-c1 Ready control-plane,master 44s v1.22.3 +# +# Accessing by exporting the kubeconfig file: +# +# $ export KUBECONFIG=$(limactl list c1 --format 'unix://{{.Dir}}/copied-from-guest/kubeconfig.yaml') +# +# $ kubectl get no +# NAME STATUS ROLES AGE VERSION +# lima-c1 Ready control-plane,master 44s v1.22.3 + +# This template requires Lima v0.20.0 or later. +images: +- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img" + arch: "x86_64" +- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img" + arch: "aarch64" + +# Mounts are disabled in this template, but can be enabled optionally. +mounts: [] + +containerd: + system: true + user: false + +# Using externally managed socket_vmnet. +networks: +- socket: /var/run/socket_vmnet + +# Port forwarding is disabled since we access the clusters via the shared network. +portForwards: +- ignore: true + guestIP: "0.0.0.0" + +provision: +# See +- mode: system + script: | + #!/bin/bash + set -eux -o pipefail + command -v kubeadm >/dev/null 2>&1 && exit 0 + # Install and configure prerequisites + cat < +- mode: system + script: | + #!/bin/bash + set -eux -o pipefail + grep SystemdCgroup /etc/containerd/config.toml && exit 0 + grep "version = 2" /etc/containerd/config.toml || exit 1 + # Configuring the systemd cgroup driver + # Overriding the sandbox (pause) image + cat <>/etc/containerd/config.toml + [plugins] + [plugins."io.containerd.grpc.v1.cri"] + sandbox_image = "$(kubeadm config images list | grep pause | sort -r | head -n1)" + device_ownership_from_security_context = true + [plugins."io.containerd.grpc.v1.cri".containerd] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + SystemdCgroup = true + EOF + systemctl restart containerd + +# See +- mode: system + script: | + #!/bin/bash + set -eux -o pipefail + test -e /etc/kubernetes/admin.conf && exit 0 + export KUBECONFIG=/etc/kubernetes/admin.conf + kubeadm config images list + kubeadm config images pull --cri-socket=unix:///run/containerd/containerd.sock + # Use the shared network, required for components like submariner and pods + # that need to be accessible on the shared networks. + ADVERTISE_ADDRESS=$(ip -j -4 route show default | jq -r '.[0].prefsrc') + # Initializing your control-plane node + cat <kubeadm-config.yaml + kind: InitConfiguration + apiVersion: kubeadm.k8s.io/v1beta3 + nodeRegistration: + criSocket: unix:///run/containerd/containerd.sock + kubeletExtraArgs: + node-ip: $ADVERTISE_ADDRESS + serialize-image-pulls: "false" + localAPIEndpoint: + advertiseAddress: $ADVERTISE_ADDRESS + --- + kind: ClusterConfiguration + apiVersion: kubeadm.k8s.io/v1beta3 + apiServer: + certSANs: # --apiserver-cert-extra-sans + - "127.0.0.1" + networking: + podSubnet: "10.244.0.0/16" # --pod-network-cidr + --- + kind: KubeletConfiguration + apiVersion: kubelet.config.k8s.io/v1beta1 + cgroupDriver: systemd + EOF + kubeadm init --config kubeadm-config.yaml + # Installing a Pod network add-on + kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.24.0/kube-flannel.yml + # Control plane node isolation + kubectl taint nodes --all node-role.kubernetes.io/control-plane- + mkdir -p ${HOME:-/root}/.kube && cp -f $KUBECONFIG ${HOME:-/root}/.kube/config + +- mode: system + script: | + #!/bin/bash + set -eux -o pipefail + export KUBECONFIG=/etc/kubernetes/admin.conf + mkdir -p {{.Home}}/.kube + cp -f $KUBECONFIG {{.Home}}/.kube/config + chown -R {{.User}} {{.Home}}/.kube + +probes: +- description: "kubeadm to be installed" + script: | + #!/bin/bash + set -eux -o pipefail + if ! timeout 30s bash -c "until command -v kubeadm >/dev/null 2>&1; do sleep 3; done"; then + echo >&2 "kubeadm is not installed yet" + exit 1 + fi + hint: | + See "/var/log/cloud-init-output.log" in the guest +- description: "kubeadm to be completed" + script: | + #!/bin/bash + set -eux -o pipefail + if ! timeout 300s bash -c "until test -f /etc/kubernetes/admin.conf; do sleep 3; done"; then + echo >&2 "k8s is not running yet" + exit 1 + fi + hint: | + The k8s kubeconfig file has not yet been created. +- description: "kubernetes cluster to be running" + script: | + #!/bin/bash + set -eux -o pipefail + if ! timeout 300s bash -c "until kubectl version >/dev/null 2>&1; do sleep 3; done"; then + echo >&2 "kubernetes cluster is not up and running yet" + exit 1 + fi +- description: "coredns deployment to be running" + script: | + #!/bin/bash + set -eux -o pipefail + kubectl wait -n kube-system --timeout=180s --for=condition=available deploy coredns + +copyToHost: +- guest: "/etc/kubernetes/admin.conf" + host: "{{.Dir}}/copied-from-guest/kubeconfig.yaml" + deleteOnStop: true + +message: | + To run `kubectl` on the host (assumes kubectl is installed), run the following commands: + ------ + export KUBECONFIG="{{.Dir}}/copied-from-guest/kubeconfig.yaml" + kubectl ... + ------