Skip to content
Closed
79 changes: 76 additions & 3 deletions client/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,82 @@ func (n *NodeClient) NetworkListInterfaces(ctx context.Context) (result map[stri
return
}

// AdminRebootNode stops all the running services and reboots the node
func (n *NodeClient) AdminRebootNode(ctx context.Context) error {
const cmd = "zos.admin.reboot"

return n.bus.Call(ctx, n.nodeTwin, cmd, nil, nil)
}

// AdminRestartService restarts a zinit service
func (n *NodeClient) AdminRestartService(ctx context.Context, service string) error {
const cmd = "zos.admin.restart"

return n.bus.Call(ctx, n.nodeTwin, cmd, service, nil)
}

// AdminRestartAll restarts all zinit services
func (n *NodeClient) AdminRestartAll(ctx context.Context) error {
const cmd = "zos.admin.restart_all"

return n.bus.Call(ctx, n.nodeTwin, cmd, nil, nil)
}

// AdminShowLogs returns l lines of zinit logs
func (n *NodeClient) AdminShowLogs(ctx context.Context, l int) (logs []byte, err error) {
const cmd = "zos.admin.show_logs"

err = n.bus.Call(ctx, n.nodeTwin, cmd, l, &logs)
return
}

// AdminShowResolve return the content of /etc/resolv.conf
func (n *NodeClient) AdminShowResolve(ctx context.Context) (res []byte, err error) {
const cmd = "zos.admin.show_resolve"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &res)
return
}

// AdminShowOpenConnections return information about all open connections in the node
func (n *NodeClient) AdminShowOpenConnections(ctx context.Context) (res []byte, err error) {
const cmd = "zos.admin.show_open_connections"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &res)
return
}

// AdminStopWorkload stops a workload
func (n *NodeClient) AdminStopWorkload(ctx context.Context, twinID uint32, wlID uint64) error {
const cmd = "zos.admin.stop_workload"
args := struct {
TwinID uint32 `json:"twin_id"`
WorkloadID uint64 `json:"workload_id"`
}{
TwinID: twinID,
WorkloadID: wlID,
}

return n.bus.Call(ctx, n.nodeTwin, cmd, args, nil)
}

// AdminResumeWorkload stops a workload
func (n *NodeClient) AdminResumeWorkload(ctx context.Context, twinID uint32, wlID uint64) error {
const cmd = "zos.admin.resume_workload"
args := struct {
TwinID uint32 `json:"twin_id"`
WorkloadID uint64 `json:"workload_id"`
}{
TwinID: twinID,
WorkloadID: wlID,
}

return n.bus.Call(ctx, n.nodeTwin, cmd, args, nil)
}

// NetworkListAllInterfaces return all physical devices on a node
func (n *NodeClient) NetworkListAllInterfaces(ctx context.Context) (result map[string]Interface, err error) {
const cmd = "zos.network.admin.interfaces"
const cmd = "zos.admin.interfaces"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &result)

Expand All @@ -271,14 +344,14 @@ func (n *NodeClient) NetworkListAllInterfaces(ctx context.Context) (result map[s
// NetworkSetPublicExitDevice select which physical interface to use as an exit device
// setting `iface` to `zos` will then make node run in a single nic setup.
func (n *NodeClient) NetworkSetPublicExitDevice(ctx context.Context, iface string) error {
const cmd = "zos.network.admin.set_public_nic"
const cmd = "zos.admin.set_public_nic"

return n.bus.Call(ctx, n.nodeTwin, cmd, iface, nil)
}

// NetworkGetPublicExitDevice gets the current dual nic setup of the node.
func (n *NodeClient) NetworkGetPublicExitDevice(ctx context.Context) (exit ExitDevice, err error) {
const cmd = "zos.network.admin.get_public_nic"
const cmd = "zos.admin.get_public_nic"

err = n.bus.Call(ctx, n.nodeTwin, cmd, nil, &exit)
return
Expand Down
10 changes: 6 additions & 4 deletions cmds/modules/api_gateway/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ func action(cli *cli.Context) error {
return fmt.Errorf("failed to create substrate manager: %w", err)
}

router := peer.NewRouter()
gw, err := substrategw.NewSubstrateGateway(manager, id)
if err != nil {
return fmt.Errorf("failed to create api gateway: %w", err)
Expand Down Expand Up @@ -96,6 +95,8 @@ func action(cli *cli.Context) error {
if err != nil {
return fmt.Errorf("failed to create zos api: %w", err)
}

router := peer.NewRouter()
api.SetupRoutes(router)

pair, err := id.KeyPair()
Expand All @@ -105,7 +106,7 @@ func action(cli *cli.Context) error {

bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = 0
backoff.Retry(func() error {
if err = backoff.Retry(func() error {
_, err = peer.NewPeer(
ctx,
hex.EncodeToString(pair.Seed()),
Expand All @@ -117,9 +118,10 @@ func action(cli *cli.Context) error {
if err != nil {
return fmt.Errorf("failed to start a new rmb peer: %w", err)
}

return nil
}, bo)
}, bo); err != nil {
return err
}

log.Info().
Str("broker", msgBrokerCon).
Expand Down
4 changes: 2 additions & 2 deletions cmds/modules/provisiond/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *ContractEventHandler) sync(ctx context.Context) error {
action = r.engine.Pause
}

if err := action(ctx, dl.TwinID, dl.ContractID); err != nil {
if err := action(dl.TwinID, dl.ContractID); err != nil {
log.Error().Err(err).Msg("failed to change contract state")
}
}
Expand Down Expand Up @@ -176,7 +176,7 @@ func (r *ContractEventHandler) Run(ctx context.Context) error {
action = r.engine.Pause
}

if err := action(ctx, event.TwinId, event.Contract); err != nil {
if err := action(event.TwinId, event.Contract); err != nil {
log.Error().Err(err).
Uint32("twin", event.TwinId).
Uint64("contract", event.Contract).
Expand Down
94 changes: 88 additions & 6 deletions docs/manual/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ so `used = user_used + system`, while `system` is only the amount of resourced r
| `zos.storage.pools` | - |`[]Pool`|

List all node pools with their types, size and used space
where

Where

```json
Pool {
Expand Down Expand Up @@ -151,13 +152,95 @@ it means it can act like an access node to user private networks

## Admin

The next set of commands are ONLY possible to be called by the `farmer` only.
The next set of commands are ONLY possible to be called by the `farmer` owning the node.

### Reboot Node

| command |body| return|
|---|---|---|
| `zos.admin.reboot` | - | - |

Stops all services then reboots the node

### Restart Service

| command |body| return|
|---|---|---|
| `zos.admin.restart` | string | - |

Restarts a service running on the node

### Restart All Services

| command |body| return|
|---|---|---|
| `zos.admin.restart_all` | - | - |

Restarts all zinit services running on the node

### Show Logs

| command |body| return|
|---|---|---|
| `zos.admin.show_logs` | int | []byte |

Shows a number of lines of zinit logs

### Show Resolve

| command |body| return|
|---|---|---|
| `zos.admin.show_resolve` | - | []byte |

Shows the content of /etc/resolv.conf

### Show Open Connections

| command |body| return|
|---|---|---|
| `zos.admin.show_open_connections` | - | []byte |

Shows information about all open connections in the node

### Stop Workload

| command |body| return|
|---|---|---|
| `zos.admin.Stop` | `Args` | - |

Where

```json
Args {
"twin_id": "uint32",
"workload_id": "uint64",
}
```

Stops a workload

### Resume Workload

| command |body| return|
|---|---|---|
| `zos.admin.resume` | `Args` | - |

Where

```json
Args {
"twin_id": "uint32",
"workload_id": "uint64",
}
```

Resumes a stopped workload

### List Physical Interfaces

| command |body| return|
|---|---|---|
| `zos.network.admin.interfaces` | - |`map[string]Interface` |
| `zos.admin.interfaces` | - |`map[string]Interface` |

Where

Expand All @@ -175,7 +258,7 @@ Those interfaces then can be used as an input to `set_public_nic`

| command |body| return|
|---|---|---|
| `zos.network.admin.get_public_nic` | - |`ExitDevice` |
| `zos.admin.get_public_nic` | - |`ExitDevice` |

Where

Expand All @@ -193,7 +276,7 @@ returns the interface used by public traffic (for user workloads)

| command |body| return|
|---|---|---|
| `zos.network.admin.set_public_nic` | `name` |- |
| `zos.admin.set_public_nic` | `name` |- |

name must be one of (free) names returned by `zos.network.admin.interfaces`

Expand Down Expand Up @@ -223,7 +306,6 @@ name must be one of (free) names returned by `zos.network.admin.interfaces`
|---|---|---|
| `zos.system.node_features_get` | - |`[]NodeFeature` |


Where

```json
Expand Down
40 changes: 28 additions & 12 deletions pkg/primitives/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"context"
"encoding/json"
"fmt"
"os/exec"
"strconv"
"strings"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -31,9 +34,7 @@ func GetCapacity(ctx context.Context) gridtypes.Capacity {
return val.(gridtypes.Capacity)
}

var (
_ provision.Provisioner = (*Statistics)(nil)
)
var _ provision.Provisioner = (*Statistics)(nil)

type Reserved func() (gridtypes.Capacity, error)

Expand Down Expand Up @@ -146,7 +147,6 @@ func (s *Statistics) hasEnoughCapacity(wl *gridtypes.WorkloadWithID) (gridtypes.
id, _ := gridtypes.NewWorkloadID(dl_.TwinID, dl_.ContractID, wl_.Name)
return id == wl.ID
})

if err != nil {
return used, errors.Wrap(err, "failed to get available memory")
}
Expand All @@ -155,7 +155,7 @@ func (s *Statistics) hasEnoughCapacity(wl *gridtypes.WorkloadWithID) (gridtypes.
return used, fmt.Errorf("cannot fulfil required memory size %d bytes out of usable %d bytes", required.MRU, usable)
}

//check other resources as well?
// check other resources as well?
return used, nil
}

Expand Down Expand Up @@ -235,6 +235,19 @@ func (s *statsStream) Total() gridtypes.Capacity {
return s.stats.Total()
}

func (s *statsStream) OpenConnections() ([]byte, error) {
return exec.Command("ss", "-ptn", "state", "established").Output()
}

func (s *statsStream) openConnectionsCount() (int, error) {
cmd := exec.Command("/bin/sh", "-c", "ss -ptn state established | wc -l")
out, err := cmd.Output()
if err != nil {
return 0, err
}
return strconv.Atoi(strings.TrimSpace(string(out)))
}

func (s *statsStream) Workloads() (int, error) {
capacity, err := s.stats.storage.Capacity()
if err != nil {
Expand All @@ -253,10 +266,17 @@ func (s *statsStream) GetCounters() (pkg.Counters, error) {
if err != nil {
return pkg.Counters{}, err
}

connCount, err := s.openConnectionsCount()
if err != nil {
return pkg.Counters{}, err
}

return pkg.Counters{
Total: s.stats.Total(),
Used: activeCounters.cap,
System: reserved,
Total: s.stats.Total(),
Used: activeCounters.cap,
System: reserved,
OpenConnecions: connCount,
Users: pkg.UsersCounters{
Deployments: activeCounters.deployments,
Workloads: activeCounters.workloads,
Expand Down Expand Up @@ -298,10 +318,6 @@ func (s *statsStream) ListGPUs() ([]pkg.GPUInfo, error) {
return nil, errors.Wrap(err, "failed to list available devices")
}

if err != nil {
return nil, errors.Wrap(err, "failed to list active deployments")
}

used, err := usedGpus()
if err != nil {
return nil, errors.Wrap(err, "failed to list used gpus")
Expand Down
Loading
Loading