From d0fe4e6d71151f852c5ff0539be8ed9c5bc9e8cd Mon Sep 17 00:00:00 2001 From: Andre Araujo Date: Thu, 7 Jul 2022 12:08:40 +1000 Subject: [PATCH 1/6] Fixed reference to an inexistent attribute in error message formatting. And shortened a few other message lines. Signed-off-by: Andre Araujo --- plugins/modules/df_service.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py index cff17a35..4c6573af 100644 --- a/plugins/modules/df_service.py +++ b/plugins/modules/df_service.py @@ -280,6 +280,7 @@ def __init__(self, module): @CdpModule._Decorators.process_debug def process(self): + original_env_crn = self.env_crn if self.env_crn is not None: self.env_crn = self.cdpy.environments.resolve_environment_crn(self.env_crn) if self.env_crn is not None or self.df_crn is not None: @@ -294,8 +295,9 @@ def process(self): self._disable_df() elif self.state in ['present']: self.module.warn( - "Dataflow Service already enabled and configuration validation and reconciliation is not supported;" + - "to change a Dataflow Service, explicitly disable and recreate the Service or use the UI") + "Dataflow Service already enabled and configuration validation and reconciliation is not " + "supported; to change a Dataflow Service, explicitly disable and recreate the Service or " + "use the UI") if self.wait: self.service = self._wait_for_enabled() else: @@ -304,11 +306,10 @@ def process(self): else: # Environment does not have DF database entry, and probably doesn't exist if self.state in ['absent']: - self.module.log( - "Dataflow Service already disabled in CDP Environment %s" % self.env_crn) + self.module.log("Dataflow Service already disabled in CDP Environment %s" % self.env_crn) elif self.state in ['present']: if self.env_crn is None: - self.module.fail_json(msg="Could not retrieve CRN for CDP Environment %s" % self.env) + self.module.fail_json(msg="Could not retrieve CRN for CDP Environment %s" % original_env_crn) else: # create DF Service if not self.module.check_mode: From a7c5f4bfdeb85bea95c9dd3856851e9a4d5d23c7 Mon Sep 17 00:00:00 2001 From: Andre Araujo Date: Thu, 7 Jul 2022 12:14:23 +1000 Subject: [PATCH 2/6] The loadbalancer_subnets parameter was being ignored. Due to being extracted with the wrong name (lb_subnets) Signed-off-by: Andre Araujo --- plugins/modules/df_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py index 4c6573af..a33da4fe 100644 --- a/plugins/modules/df_service.py +++ b/plugins/modules/df_service.py @@ -257,7 +257,7 @@ def __init__(self, module): self.lb_ip_ranges = self._get_param('loadbalancer_ip_ranges') self.kube_ip_ranges = self._get_param('kube_ip_ranges') self.cluster_subnets = self._get_param('cluster_subnets') - self.lb_subnets = self._get_param('lb_subnets') + self.lb_subnets = self._get_param('loadbalancer_subnets') self.persist = self._get_param('persist') self.terminate = self._get_param('terminate') self.force = self._get_param('force') From 4dfad2fa99473e0ccf4a2c1e71ba188e3acc6e66 Mon Sep 17 00:00:00 2001 From: Andre Araujo Date: Thu, 7 Jul 2022 12:20:02 +1000 Subject: [PATCH 3/6] Fixed inaccuracies and missing parts in the module doc. Signed-off-by: Andre Araujo --- plugins/modules/df_service.py | 43 ++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py index a33da4fe..d7664ca1 100644 --- a/plugins/modules/df_service.py +++ b/plugins/modules/df_service.py @@ -35,17 +35,17 @@ options: env_crn: description: - - The CRN of the CDP Environment to host the Dataflow Service - - Required when state is present + - The CRN of the CDP Environment to host the Dataflow Service + - The environment name can also be provided, instead of the CRN + - Required when state is present type: str - required: False + required: Conditional aliases: - name - - crn df_crn: description: - - The CRN of the DataFlow Service, if available - - Required when state is absent + - The CRN of the DataFlow Service, if available + - Required when state is absent type: str required: Conditional state: @@ -78,12 +78,24 @@ required: False aliases: - use_public_load_balancer - ip_ranges: + loadbalancer_ip_ranges: + description: The IP ranges authorized to connect to the load balancer + type: list + required: False + kube_ip_ranges: description: The IP ranges authorized to connect to the Kubernetes API server type: list required: False - aliases: - - authorized_ip_ranges + cluster_subnets: + description: + - Subnet ids that will be assigned to the Kubernetes cluster + type: list + required: False + loadbalancer_subnets: + description: + - Subnet ids that will be assigned to the load balancer + type: list + required: False persist: description: Whether or not to retain the database records of related entities during removal. type: bool @@ -94,6 +106,12 @@ type: bool required: False default: False + force: + description: Flag to indicate if the DataFlow deletion should be forced. + type: bool + required: False + aliases: + - force_delete tags: description: Tags to apply to the DataFlow Service type: dict @@ -139,7 +157,7 @@ nodes_min: 3 nodes_max: 10 public_loadbalancer: True - ip_ranges: ['192.168.0.1/24'] + kube_ip_ranges: ['192.168.0.1/24'] state: present wait: yes @@ -386,7 +404,7 @@ def _disable_df(self): def main(): module = AnsibleModule( argument_spec=CdpModule.argument_spec( - env_crn=dict(type='str'), + env_crn=dict(type='str', aliases=['name']), df_crn=dict(type='str'), nodes_min=dict(type='int', default=3, aliases=['min_k8s_node_count']), nodes_max=dict(type='int', default=3, aliases=['max_k8s_node_count']), @@ -398,8 +416,7 @@ def main(): persist=dict(type='bool', default=False), terminate=dict(type='bool', default=False), tags=dict(required=False, type='dict', default=None), - state=dict(type='str', choices=['present', 'absent'], - default='present'), + state=dict(type='str', choices=['present', 'absent'], default='present'), force=dict(type='bool', default=False, aliases=['force_delete']), wait=dict(type='bool', default=True), delay=dict(type='int', aliases=['polling_delay'], default=15), From 7b1c9e7a68f98c0db4303a97d8309a85e833c543 Mon Sep 17 00:00:00 2001 From: Andre Araujo Date: Thu, 7 Jul 2022 12:25:21 +1000 Subject: [PATCH 4/6] Added cluster_subnets_filter and loadbalancer_subnets_filter options to module. Signed-off-by: Andre Araujo --- plugins/modules/df_service.py | 85 +++++++++++++++++++++++++++++++---- 1 file changed, 76 insertions(+), 9 deletions(-) diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py index d7664ca1..7f3e1dce 100644 --- a/plugins/modules/df_service.py +++ b/plugins/modules/df_service.py @@ -14,7 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import json +import jmespath from ansible.module_utils.basic import AnsibleModule from ansible_collections.cloudera.cloud.plugins.module_utils.cdp_common import CdpModule @@ -37,7 +38,7 @@ description: - The CRN of the CDP Environment to host the Dataflow Service - The environment name can also be provided, instead of the CRN - - Required when state is present + - Required when I(state=present) type: str required: Conditional aliases: @@ -45,7 +46,7 @@ df_crn: description: - The CRN of the DataFlow Service, if available - - Required when state is absent + - Required when I(state=absent) type: str required: Conditional state: @@ -82,20 +83,40 @@ description: The IP ranges authorized to connect to the load balancer type: list required: False - kube_ip_ranges: + k8s_ip_ranges: description: The IP ranges authorized to connect to the Kubernetes API server type: list required: False cluster_subnets: description: - Subnet ids that will be assigned to the Kubernetes cluster + - Mutually exclusive with the cluster_subnets_filter option type: list required: False + cluster_subnets_filter: + description: + - L(JMESPath,https://jmespath.org/) expression to filter the subnets to be used for the Kubernetes cluster + - The expression will be applied to the full list of subnets for the specified environment + - Each subnet in the list is an object with the following attributes: subnetId, subnetName, availabilityZone, cidr + - The filter expression must only filter the list, but not apply any attribute projection + - Mutually exclusive with the cluster_subnets option + type: str + required: False loadbalancer_subnets: description: - Subnet ids that will be assigned to the load balancer + - Mutually exclusive with the loadbalancer_subnets_filter option type: list required: False + loadbalancer_subnets_filter: + description: + - L(JMESPath,https://jmespath.org/) expression to filter the subnets to be used for the load balancer + - The expression will be applied to the full list of subnets for the specified environment + - Each subnet in the list is an object with the following attributes: subnetId, subnetName, availabilityZone, cidr + - The filter expression must only filter the list, but not apply any attribute projection + - Mutually exclusive with the cluster_subnets option + type: str + required: False persist: description: Whether or not to retain the database records of related entities during removal. type: bool @@ -157,7 +178,9 @@ nodes_min: 3 nodes_max: 10 public_loadbalancer: True - kube_ip_ranges: ['192.168.0.1/24'] + cluster_subnets_filter: "[?contains(subnetName, 'pvt')]" + loadbalancer_subnets_filter: "[?contains(subnetName, 'pub')]" + k8s_ip_ranges: ['192.168.0.1/24'] state: present wait: yes @@ -273,9 +296,11 @@ def __init__(self, module): self.nodes_max = self._get_param('nodes_max') self.public_loadbalancer = self._get_param('public_loadbalancer') self.lb_ip_ranges = self._get_param('loadbalancer_ip_ranges') - self.kube_ip_ranges = self._get_param('kube_ip_ranges') + self.k8s_ip_ranges = self._get_param('k8s_ip_ranges') self.cluster_subnets = self._get_param('cluster_subnets') + self.cluster_subnets_filter = self._get_param('cluster_subnets_filter') self.lb_subnets = self._get_param('loadbalancer_subnets') + self.lb_subnets_filter = self._get_param('loadbalancer_subnets_filter') self.persist = self._get_param('persist') self.terminate = self._get_param('terminate') self.force = self._get_param('force') @@ -330,6 +355,21 @@ def process(self): self.module.fail_json(msg="Could not retrieve CRN for CDP Environment %s" % original_env_crn) else: # create DF Service + if self.cluster_subnets_filter or self.lb_subnets_filter: + try: + env_info = self.cdpy.environments.describe_environment(self.env_crn) + subnet_metadata = list(env_info['network']['subnetMetadata'].values()) + except Exception: + subnet_metadata = [] + if not subnet_metadata: + self.module.fail_json( + msg="Could not retrieve subnet metadata for CDP Environment %s" % self.env_crn) + + if self.cluster_subnets_filter: + self.cluster_subnets = self._filter_subnets(self.cluster_subnets_filter, subnet_metadata) + if self.lb_subnets_filter: + self.lb_subnets = self._filter_subnets(self.lb_subnets_filter, subnet_metadata) + if not self.module.check_mode: self.service = self.cdpy.df.enable_service( env_crn=self.env_crn, @@ -337,7 +377,7 @@ def process(self): max_nodes=self.nodes_max, enable_public_ip=self.public_loadbalancer, lb_ips=self.lb_ip_ranges, - kube_ips=self.kube_ip_ranges, + kube_ips=self.k8s_ip_ranges, # tags=self.tags, # Currently overstrict blocking of values cluster_subnets=self.cluster_subnets, lb_subnets=self.lb_subnets @@ -356,6 +396,27 @@ def _wait_for_enabled(self): delay=self.delay, timeout=self.timeout ) + def _filter_subnets(self, query, subnets): + """Apply a JMESPath to an array of subnets and return the id of the selected subnets. + The query must only filter the array, without applying any projection. The query result must also be an + array of subnet objects. + + :param query: JMESpath query to filter the subnet array. + :param subnets: An array of subnet objects. Each subnet in the array is an object with the following attributes: + subnetId, subnetName, availabilityZone, cidr. + :return: An array of subnet ids. + """ + filtered_subnets = [] + try: + filtered_subnets = jmespath.search(query, subnets) + except Exception: + self.module.fail_json(msg="The specified subnet filter is an invalid JMESPath expression: " % query) + try: + return [s['subnetId'] for s in filtered_subnets] + except Exception: + self.module.fail_json(msg='The subnet filter "%s" should return an array of subnet objects ' + 'but instead returned this: %s' % (query, json.dumps(filtered_subnets))) + def _disable_df(self): # Attempt clean Disable, which also ensures we have tried at least once before we do a forced removal if self.target['status']['state'] in self.cdpy.sdk.REMOVABLE_STATES: @@ -410,9 +471,11 @@ def main(): nodes_max=dict(type='int', default=3, aliases=['max_k8s_node_count']), public_loadbalancer=dict(type='bool', default=False, aliases=['use_public_load_balancer']), loadbalancer_ip_ranges=dict(type='list', elements='str', default=None), - kube_ip_ranges=dict(type='list', elements='str', default=None), + k8s_ip_ranges=dict(type='list', elements='str', default=None), cluster_subnets=dict(type='list', elements='str', default=None), + cluster_subnets_filter=dict(type='str', default=None), loadbalancer_subnets=dict(type='list', elements='str', default=None), + loadbalancer_subnets_filter=dict(type='str', default=None), persist=dict(type='bool', default=False), terminate=dict(type='bool', default=False), tags=dict(required=False, type='dict', default=None), @@ -426,7 +489,11 @@ def main(): required_if=[ ('state', 'present', ('env_crn', ), False), ('state', 'absent', ('df_crn', ), False) - ] + ], + mutually_exclusive=[ + ('cluster_subnets', 'cluster_subnets_filter'), + ('loadbalancer_subnets', 'loadbalancer_subnets_filter'), + ], ) result = DFService(module) From 03f3bdbe8cc30b2af0f77a7a9f806aeca78c7a5a Mon Sep 17 00:00:00 2001 From: Andre Araujo Date: Mon, 7 Nov 2022 12:42:48 +1100 Subject: [PATCH 5/6] Added a subnet filter parameter to DataHub Cluster. Signed-off-by: Andre Araujo --- plugins/modules/datahub_cluster.py | 99 +++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 10 deletions(-) diff --git a/plugins/modules/datahub_cluster.py b/plugins/modules/datahub_cluster.py index 3ef27bc2..0ed86cfc 100644 --- a/plugins/modules/datahub_cluster.py +++ b/plugins/modules/datahub_cluster.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import jmespath from ansible.module_utils.basic import AnsibleModule from ansible_collections.cloudera.cloud.plugins.module_utils.cdp_common import CdpModule @@ -75,13 +76,30 @@ type: str required: False subnet: - description: The subnet ID in AWS, or the Subnet Name on Azure or GCP + description: + - The subnet ID in AWS, or the Subnet Name on Azure or GCP + - Mutually exclusive with the subnet and subnets options type: str required: False samples: - Azure: fe-az-f0-sbnt-2 - AWS: subnet-0bb1c79de3EXAMPLE - GCP: fe-gc-j8-sbnt-0 + subnets: + description: + - List of subnet IDs in case of multi availability zone setup. + - Mutually exclusive with the subnet and subnets options + type: list + required: False + subnets_filter: + description: + - L(JMESPath,https://jmespath.org/) expression to filter the subnets to be used for the load balancer + - The expression will be applied to the full list of subnets for the specified environment + - Each subnet in the list is an object with the following attributes: subnetId, subnetName, availabilityZone, cidr + - The filter expression must only filter the list, but not apply any attribute projection + - Mutually exclusive with the subnet and subnets options + type: list + required: False image: description: ID of the image used for cluster instances type: str @@ -152,6 +170,11 @@ required: False aliases: - datahub_tags + extension: + description: + - Cluster extensions for Data Hub cluster. + type: str + required: False force: description: - Flag indicating if the datahub should be force deleted. @@ -394,11 +417,15 @@ def __init__(self, module): self.environment = self._get_param('environment') self.definition = self._get_param('definition') self.subnet = self._get_param('subnet') + self.subnets = self._get_param('subnets') + self.subnets_filter = self._get_param('subnets_filter') self.image_id = self._get_param('image') self.image_catalog = self._get_param('catalog') self.template = self._get_param('template') self.groups = self._get_param('groups') self.tags = self._get_param('tags') + self.extension = self._get_param('extension') + self.multi_az = self._get_param('multi_az') self.wait = self._get_param('wait') self.delay = self._get_param('delay') @@ -542,16 +569,40 @@ def _configure_payload(self): ) if self.definition is not None: - payload["clusterDefinitionName"]=self.definition + payload["clusterDefinitionName"] = self.definition else: - payload["image"]={"id": self.image_id, "catalogName": self.image_catalog} - payload["clusterTemplateName"]=self.template - payload["instanceGroups"]=self.groups + payload["image"] = {"id": self.image_id, "catalogName": self.image_catalog} + payload["clusterTemplateName"] = self.template + payload["instanceGroups"] = self.groups + + if self.subnets_filter: + try: + env_info = self.cdpy.environments.describe_environment(self.environment) + subnet_metadata = list(env_info['network']['subnetMetadata'].values()) + except Exception: + subnet_metadata = [] + if not subnet_metadata: + self.module.fail_json( + msg="Could not retrieve subnet metadata for CDP Environment %s" % self.env_crn) + + subnets = self._filter_subnets(self.subnets_filter, subnet_metadata) + if len(subnets) == 1: + self.subnet = subnets[0] + else: + self.subnets = subnets - if self.host_env['cloudPlatform'] == 'GCP': - payload['subnetName'] = self.subnet - else: - payload['subnetId'] = self.subnet + if self.subnet: + if self.host_env['cloudPlatform'] == 'GCP': + payload['subnetName'] = self.subnet + else: + payload['subnetId'] = self.subnet + elif self.subnets: + payload['subnetIds'] = self.subnets + + if self.extension is not None: + payload['clusterExtension'] = self.extension + + payload['multiAz'] = self.multi_az if self.tags is not None: payload['tags'] = list() @@ -560,6 +611,27 @@ def _configure_payload(self): return payload + def _filter_subnets(self, query, subnets): + """Apply a JMESPath to an array of subnets and return the id of the selected subnets. + The query must only filter the array, without applying any projection. The query result must also be an + array of subnet objects. + + :param query: JMESpath query to filter the subnet array. + :param subnets: An array of subnet objects. Each subnet in the array is an object with the following attributes: + subnetId, subnetName, availabilityZone, cidr. + :return: An array of subnet ids. + """ + filtered_subnets = [] + try: + filtered_subnets = jmespath.search(query, subnets) + except Exception: + self.module.fail_json(msg="The specified subnet filter is an invalid JMESPath expression: " % query) + try: + return [s['subnetId'] for s in filtered_subnets] + except Exception: + self.module.fail_json(msg='The subnet filter "%s" should return an array of subnet objects ' + 'but instead returned this: %s' % (query, json.dumps(filtered_subnets))) + def _reconcile_existing_state(self, existing): mismatched = list() @@ -594,19 +666,26 @@ def main(): state=dict(required=False, type='str', choices=['present', 'started', 'stopped', 'absent'], default='present'), definition=dict(required=False, type='str'), subnet=dict(required=False, type='str', default=None), + subnets=dict(required=False, type='list', elements='str', default=None), + subnets_filter=dict(required=False, type='str', default=None), image=dict(required=False, type='str', default=None), catalog=dict(required=False, type='str', default=None), template=dict(required=False, type='str', default=None), groups=dict(required=False, type='list', default=None), environment=dict(required=False, type='str', aliases=['env'], default=None), tags=dict(required=False, type='dict', aliases=['datahub_tags']), + extension=dict(required=False, type='dict'), + multi_az=dict(required=False, type='bool', default=True), force=dict(required=False, type='bool', default=False), wait=dict(required=False, type='bool', default=True), delay=dict(required=False, type='int', aliases=['polling_delay'], default=15), timeout=dict(required=False, type='int', aliases=['polling_timeout'], default=3600) ), - supports_check_mode=True + supports_check_mode=True, + mutually_exclusive=[ + ('subnet', 'subnets', 'subnets_filter'), + ], #Punting on additional checks here. There are a variety of supporting datahub invocations that can make this more complex #required_together=[ # ['subnet', 'image', 'catalog', 'template', 'groups', 'environment'], From 2af8d37c393e29e7d1aac494840061bfb68aed6f Mon Sep 17 00:00:00 2001 From: Andre Araujo Date: Wed, 4 Jan 2023 11:31:50 +1100 Subject: [PATCH 6/6] Removed non-boolean required references in doc. Signed-off-by: Andre Araujo --- plugins/modules/df_service.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py index 7f3e1dce..72b5afa2 100644 --- a/plugins/modules/df_service.py +++ b/plugins/modules/df_service.py @@ -40,7 +40,6 @@ - The environment name can also be provided, instead of the CRN - Required when I(state=present) type: str - required: Conditional aliases: - name df_crn: @@ -48,7 +47,6 @@ - The CRN of the DataFlow Service, if available - Required when I(state=absent) type: str - required: Conditional state: description: - The declarative state of the Dataflow Service