From 8cfe8579d4e02de1b6354237e6a7cae756e61cff Mon Sep 17 00:00:00 2001
From: Nir Soffer <nsoffer@redhat.com>
Date: Sat, 14 Sep 2024 05:35:57 +0300
Subject: [PATCH 1/2] Prefer additional network over user network

If using additional network (e.g. lima:shared), override the dhcp rote
metric so the additional metric is preferred.

This fixes issues with k8s components (like submariner) that use the
first default route and break since the user network has not
connectivity between vms. With this change they always use the
additional network.

Example routes with this change when using socket_vmnet:

    $ ip route
    default via 192.168.105.1 dev lima0 proto dhcp src 192.168.105.7 metric 100
    default via 192.168.5.2 dev eth0 proto dhcp src 192.168.5.15 metric 200
    192.168.5.0/24 dev eth0 proto kernel scope link src 192.168.5.15 metric 200
    192.168.5.2 dev eth0 proto dhcp scope link src 192.168.5.15 metric 200
    192.168.105.0/24 dev lima0 proto kernel scope link src 192.168.105.7 metric 100
    192.168.105.1 dev lima0 proto dhcp scope link src 192.168.105.7 metric 100

Example routes without additional network:

    $ ip route
    default via 192.168.5.2 dev eth0 proto dhcp src 192.168.5.15 metric 200
    192.168.5.0/24 dev eth0 proto kernel scope link src 192.168.5.15 metric 200
    192.168.5.2 dev eth0 proto dhcp scope link src 192.168.5.15 metric 200

Another way to solve this issue is to fixup the metric in the
provisioning script as done in RamenDR:
https://github.com/RamenDR/ramen/blob/c02119785e734e15511236edd935c04ff71b6646/test/drenv/providers/lima/k8s.yaml#L37

But I think it is better to fix this in lima, since the current network
configuration is very problematic.

Signed-off-by: Nir Soffer <nsoffer@redhat.com>
---
 pkg/cidata/cidata.TEMPLATE.d/network-config | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pkg/cidata/cidata.TEMPLATE.d/network-config b/pkg/cidata/cidata.TEMPLATE.d/network-config
index b8a43683e51..ff291030e0b 100644
--- a/pkg/cidata/cidata.TEMPLATE.d/network-config
+++ b/pkg/cidata/cidata.TEMPLATE.d/network-config
@@ -6,6 +6,12 @@ ethernets:
       macaddress: '{{$nw.MACAddress}}'
     dhcp4: true
     set-name: {{$nw.Interface}}
+    dhcp4-overrides:
+    {{- if (eq $nw.Interface $.SlirpNICName) }}
+      route-metric: 200
+    {{- else }}
+      route-metric: 100
+    {{- end }}
     {{- if and (eq $nw.Interface $.SlirpNICName) (gt (len $.DNSAddresses) 0) }}
     nameservers:
       addresses:

From a1ff7905ad63a9f2366e65745a5cb517db4d472e Mon Sep 17 00:00:00 2001
From: Nir Soffer <nsoffer@redhat.com>
Date: Sat, 14 Sep 2024 06:43:20 +0300
Subject: [PATCH 2/2] Add k8s-vmnet example

This example show how to run multiple connected kubernetes cluster using
a shared network. You can access all the clusters via the shared
network, clusters can access each other.

This configuration was derived from k8s.yaml template, with the
following changes:

- Ensure that the API server and kublet listen on the shared network.
  This is important for pods that need to listen on the host network. An
  example is rook ceph test cluster.

- Disable port forwarding, since we access the host via the shared
  network, and automatic port forwarding cannot work for multiple
  clusters.

- Since we access the API server via the shared network, don't modify
  the kubeconfig copied to the host.

- Enable parallel image pulls for faster provisioning of complex
  clusters.

- Allow unprivileged pods to access block devices. Required for kubevirt
  virtual machines or replicating block volumes using volsync.

These changes were extracted from RamenDR k8s.yaml:
https://github.com/RamenDR/ramen/blob/main/test/drenv/providers/lima/k8s.yaml

Signed-off-by: Nir Soffer <nsoffer@redhat.com>
---
 examples/k8s-vmnet.yaml | 205 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 205 insertions(+)
 create mode 100644 examples/k8s-vmnet.yaml

diff --git a/examples/k8s-vmnet.yaml b/examples/k8s-vmnet.yaml
new file mode 100644
index 00000000000..603baa5d43f
--- /dev/null
+++ b/examples/k8s-vmnet.yaml
@@ -0,0 +1,205 @@
+# Deploy multipole connected kubernetes clusters via vmnet.
+#
+# Creating the clusters:
+#
+#   $ limactl start --name c1 --tty=false template://k8s-vmnet &
+#   $ limactl start --name c2 --tty=false template://k8s-vmnet &
+#   $ wait
+#
+# Accessing the clusters with limactl:
+#
+#   $ limactl shell c1 kubectl get node
+#   NAME       STATUS   ROLES                  AGE   VERSION
+#   lima-c1    Ready    control-plane,master   44s   v1.22.3
+#
+# Accessing by exporting the kubeconfig file:
+#
+#   $ export KUBECONFIG=$(limactl list c1 --format 'unix://{{.Dir}}/copied-from-guest/kubeconfig.yaml')
+#
+#   $ kubectl get no
+#   NAME       STATUS   ROLES                  AGE   VERSION
+#   lima-c1    Ready    control-plane,master   44s   v1.22.3
+
+# This template requires Lima v0.20.0 or later.
+images:
+- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"
+  arch: "x86_64"
+- location: "https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-arm64.img"
+  arch: "aarch64"
+
+# Mounts are disabled in this template, but can be enabled optionally.
+mounts: []
+
+containerd:
+  system: true
+  user: false
+
+# Using externally managed socket_vmnet.
+networks:
+- socket: /var/run/socket_vmnet
+
+# Port forwarding is disabled since we access the clusters via the shared network.
+portForwards:
+- ignore: true
+  guestIP: "0.0.0.0"
+
+provision:
+# See <https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/>
+- mode: system
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    command -v kubeadm >/dev/null 2>&1 && exit 0
+    # Install and configure prerequisites
+    cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
+    overlay
+    br_netfilter
+    EOF
+    modprobe overlay
+    modprobe br_netfilter
+    cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
+    net.bridge.bridge-nf-call-iptables  = 1
+    net.ipv4.ip_forward                 = 1
+    net.bridge.bridge-nf-call-ip6tables = 1
+    EOF
+    sysctl --system
+    # Installing kubeadm, kubelet and kubectl
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install -y apt-transport-https ca-certificates curl
+    VERSION=$(curl -L -s https://dl.k8s.io/release/stable.txt | sed -e 's/v//' | cut -d'.' -f1-2)
+    echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/ /" | sudo tee /etc/apt/sources.list.d/kubernetes.list
+    curl -fsSL https://pkgs.k8s.io/core:/stable:/v${VERSION}/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
+    apt-get update
+    # cri-tools
+    apt-get install -y cri-tools
+    cat  <<EOF | sudo tee /etc/crictl.yaml
+    runtime-endpoint: unix:///run/containerd/containerd.sock
+    EOF
+    # cni-plugins
+    apt-get install -y kubernetes-cni
+    rm -f /etc/cni/net.d/*.conf*
+    apt-get install -y kubelet kubeadm kubectl && apt-mark hold kubelet kubeadm kubectl
+    systemctl enable --now kubelet
+
+# See <https://kubernetes.io/docs/setup/production-environment/container-runtimes/>
+- mode: system
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    grep SystemdCgroup /etc/containerd/config.toml && exit 0
+    grep "version = 2" /etc/containerd/config.toml || exit 1
+    # Configuring the systemd cgroup driver
+    # Overriding the sandbox (pause) image
+    cat <<EOF >>/etc/containerd/config.toml
+      [plugins]
+        [plugins."io.containerd.grpc.v1.cri"]
+          sandbox_image = "$(kubeadm config images list | grep pause | sort -r | head -n1)"
+          device_ownership_from_security_context = true
+          [plugins."io.containerd.grpc.v1.cri".containerd]
+            [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
+              [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
+                runtime_type = "io.containerd.runc.v2"
+                [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+                  SystemdCgroup = true
+    EOF
+    systemctl restart containerd
+
+# See <https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/>
+- mode: system
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    test -e /etc/kubernetes/admin.conf && exit 0
+    export KUBECONFIG=/etc/kubernetes/admin.conf
+    kubeadm config images list
+    kubeadm config images pull --cri-socket=unix:///run/containerd/containerd.sock
+    # Use the shared network, required for components like submariner and pods
+    # that need to be accessible on the shared networks.
+    ADVERTISE_ADDRESS=$(ip -j -4 route show default | jq -r '.[0].prefsrc')
+    # Initializing your control-plane node
+    cat <<EOF >kubeadm-config.yaml
+    kind: InitConfiguration
+    apiVersion: kubeadm.k8s.io/v1beta3
+    nodeRegistration:
+      criSocket: unix:///run/containerd/containerd.sock
+      kubeletExtraArgs:
+        node-ip: $ADVERTISE_ADDRESS
+        serialize-image-pulls: "false"
+    localAPIEndpoint:
+      advertiseAddress: $ADVERTISE_ADDRESS
+    ---
+    kind: ClusterConfiguration
+    apiVersion: kubeadm.k8s.io/v1beta3
+    apiServer:
+      certSANs: # --apiserver-cert-extra-sans
+      - "127.0.0.1"
+    networking:
+      podSubnet: "10.244.0.0/16" # --pod-network-cidr
+    ---
+    kind: KubeletConfiguration
+    apiVersion: kubelet.config.k8s.io/v1beta1
+    cgroupDriver: systemd
+    EOF
+    kubeadm init --config kubeadm-config.yaml
+    # Installing a Pod network add-on
+    kubectl apply -f https://github.com/flannel-io/flannel/releases/download/v0.24.0/kube-flannel.yml
+    # Control plane node isolation
+    kubectl taint nodes --all node-role.kubernetes.io/control-plane-
+    mkdir -p ${HOME:-/root}/.kube && cp -f $KUBECONFIG ${HOME:-/root}/.kube/config
+
+- mode: system
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    export KUBECONFIG=/etc/kubernetes/admin.conf
+    mkdir -p {{.Home}}/.kube
+    cp -f $KUBECONFIG {{.Home}}/.kube/config
+    chown -R {{.User}} {{.Home}}/.kube
+
+probes:
+- description: "kubeadm to be installed"
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    if ! timeout 30s bash -c "until command -v kubeadm >/dev/null 2>&1; do sleep 3; done"; then
+      echo >&2 "kubeadm is not installed yet"
+      exit 1
+    fi
+  hint: |
+    See "/var/log/cloud-init-output.log" in the guest
+- description: "kubeadm to be completed"
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    if ! timeout 300s bash -c "until test -f /etc/kubernetes/admin.conf; do sleep 3; done"; then
+      echo >&2 "k8s is not running yet"
+      exit 1
+    fi
+  hint: |
+    The k8s kubeconfig file has not yet been created.
+- description: "kubernetes cluster to be running"
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    if ! timeout 300s bash -c "until kubectl version >/dev/null 2>&1; do sleep 3; done"; then
+      echo >&2 "kubernetes cluster is not up and running yet"
+      exit 1
+    fi
+- description: "coredns deployment to be running"
+  script: |
+    #!/bin/bash
+    set -eux -o pipefail
+    kubectl wait -n kube-system --timeout=180s --for=condition=available deploy coredns
+
+copyToHost:
+- guest: "/etc/kubernetes/admin.conf"
+  host: "{{.Dir}}/copied-from-guest/kubeconfig.yaml"
+  deleteOnStop: true
+
+message: |
+  To run `kubectl` on the host (assumes kubectl is installed), run the following commands:
+  ------
+  export KUBECONFIG="{{.Dir}}/copied-from-guest/kubeconfig.yaml"
+  kubectl ...
+  ------