Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions drain_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,26 @@ def drain_node(cluster, node, action_plan_file_path, force_delete, dry_run):
# Check if the selected row is empty
if selected_row.empty:
logging.error(f"No node with name '{node}' found in the CSV file.")
return False
else:
remaining_cpu_request_percentage = selected_row['remaining_cpu_request_percentage'].values[0]
if remaining_cpu_request_percentage > 20 or force_delete == 'True':
if not can_pods_be_rescheduled(cluster, node, action_plan_file_path):
logging.info(f"Pods in the node: {node} identified to be deleted cannot be accomodated in any other node. Exiting.")
return False
else:
logging.info(f"Identified {node} to be drained")
if dry_run == 'True':
logging.info(f"DRY RUN: To drain {node} run: python drain_nodes.py -c {cluster} -n {node} -f {action_plan_file_path}")
return False
else:
logging.info(f"Draining Node-Name: {node}")
drain_and_delete_node(api_client, node, cluster)
logging.info(f"Node {node} drained successfully.")
return True
else:
logging.info(f"Node-Name: {node} cannot be drained as remaining_cpu_request_percentage is {remaining_cpu_request_percentage}.")
return False

def drain_and_delete_node(api_client, node_name, cluster, grace_period = -1, ignore_daemonsets = False):
print_number_of_nodes(cluster)
Expand Down
2 changes: 2 additions & 0 deletions get_details_for_kuber_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def create_k8s_view(cluster, label_name, label_value, timestamp):

# Get all nodes
pods = core_api.list_pod_for_all_namespaces(watch=False)
if pods is None:
return None
nodes = core_api.list_node(pretty=True)

csv_headers = ['view', 'node_name', 'allocated_cpu', 'allocated_memory', 'remaining_cpu_request', 'remaining_memory_request', 'remaining_cpu_limit', 'remaining_memory_limit', 'pod_name', 'container_number', 'limit_cpu', 'request_cpu', 'limit_memory', 'request_memory']
Expand Down
12 changes: 11 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import time
from get_details_for_kuber_cluster import create_k8s_view
from create_action_plan import create_action_plan
from drain_nodes import drain_node
import argparse
import logging
from datetime import datetime
import subprocess
import sys

def parse_arguments():
"""
Expand Down Expand Up @@ -41,10 +44,17 @@ def parse_arguments():
# Call function to create Kubernetes view
logging.info(f"Creating Kubernetes view for {cluster}")
file_name = create_k8s_view(cluster, label_name, label_value, timestamp)
if file_name is None:
logging.info("No node found to drain. Exiting...")
sys.exit(0)
logging.info(f"Kubernetes view created successfully and written to {file_name}")
logging.info(f"Creating action plan for {cluster}")
node_name, action_plan_path = create_action_plan(file_name, timestamp)
status = False
if node_name is None:
logging.info("No node found to drain. Exiting...")
else:
drain_node(cluster, node_name, action_plan_path, 'False', dry_run)
status = drain_node(cluster, node_name, action_plan_path, 'False', dry_run)
if status and dry_run == 'False':
time.sleep(60) # Sleep for 60 seconds
subprocess.run(["python3", "run.py", "-c", cluster, "-l", label_name, "-v", label_value, "--log", log_level, "--dry_run", dry_run])
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@roya2yush Better to use a while loop and a max retries counter so that this doesn't run forever forking a new process every 60 seconds and consuming all the RAM or running out of max processes (32,768 in about 23 days).