From d0fe4e6d71151f852c5ff0539be8ed9c5bc9e8cd Mon Sep 17 00:00:00 2001
From: Andre Araujo <araujo@cloudera.com>
Date: Thu, 7 Jul 2022 12:08:40 +1000
Subject: [PATCH 1/6] Fixed reference to an inexistent attribute in error
 message formatting.

And shortened a few other message lines.

Signed-off-by: Andre Araujo <araujo@cloudera.com>
---
 plugins/modules/df_service.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py
index cff17a35..4c6573af 100644
--- a/plugins/modules/df_service.py
+++ b/plugins/modules/df_service.py
@@ -280,6 +280,7 @@ def __init__(self, module):
 
     @CdpModule._Decorators.process_debug
     def process(self):
+        original_env_crn = self.env_crn
         if self.env_crn is not None:
             self.env_crn = self.cdpy.environments.resolve_environment_crn(self.env_crn)
         if self.env_crn is not None or self.df_crn is not None:
@@ -294,8 +295,9 @@ def process(self):
                     self._disable_df()
             elif self.state in ['present']:
                 self.module.warn(
-                    "Dataflow Service already enabled and configuration validation and reconciliation is not supported;" +
-                    "to change a Dataflow Service, explicitly disable and recreate the Service or use the UI")
+                    "Dataflow Service already enabled and configuration validation and reconciliation is not "
+                    "supported; to change a Dataflow Service, explicitly disable and recreate the Service or "
+                    "use the UI")
                 if self.wait:
                     self.service = self._wait_for_enabled()
             else:
@@ -304,11 +306,10 @@ def process(self):
         else:
             # Environment does not have DF database entry, and probably doesn't exist
             if self.state in ['absent']:
-                self.module.log(
-                    "Dataflow Service already disabled in CDP Environment %s" % self.env_crn)
+                self.module.log("Dataflow Service already disabled in CDP Environment %s" % self.env_crn)
             elif self.state in ['present']:
                 if self.env_crn is None:
-                    self.module.fail_json(msg="Could not retrieve CRN for CDP Environment %s" % self.env)
+                    self.module.fail_json(msg="Could not retrieve CRN for CDP Environment %s" % original_env_crn)
                 else:
                     # create DF Service
                     if not self.module.check_mode:

From a7c5f4bfdeb85bea95c9dd3856851e9a4d5d23c7 Mon Sep 17 00:00:00 2001
From: Andre Araujo <araujo@cloudera.com>
Date: Thu, 7 Jul 2022 12:14:23 +1000
Subject: [PATCH 2/6] The loadbalancer_subnets parameter was being ignored.

Due to being extracted with the wrong name (lb_subnets)

Signed-off-by: Andre Araujo <araujo@cloudera.com>
---
 plugins/modules/df_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py
index 4c6573af..a33da4fe 100644
--- a/plugins/modules/df_service.py
+++ b/plugins/modules/df_service.py
@@ -257,7 +257,7 @@ def __init__(self, module):
         self.lb_ip_ranges = self._get_param('loadbalancer_ip_ranges')
         self.kube_ip_ranges = self._get_param('kube_ip_ranges')
         self.cluster_subnets = self._get_param('cluster_subnets')
-        self.lb_subnets = self._get_param('lb_subnets')
+        self.lb_subnets = self._get_param('loadbalancer_subnets')
         self.persist = self._get_param('persist')
         self.terminate = self._get_param('terminate')
         self.force = self._get_param('force')

From 4dfad2fa99473e0ccf4a2c1e71ba188e3acc6e66 Mon Sep 17 00:00:00 2001
From: Andre Araujo <araujo@cloudera.com>
Date: Thu, 7 Jul 2022 12:20:02 +1000
Subject: [PATCH 3/6] Fixed inaccuracies and missing parts in the module doc.

Signed-off-by: Andre Araujo <araujo@cloudera.com>
---
 plugins/modules/df_service.py | 43 ++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py
index a33da4fe..d7664ca1 100644
--- a/plugins/modules/df_service.py
+++ b/plugins/modules/df_service.py
@@ -35,17 +35,17 @@
 options:
   env_crn:
     description: 
-        - The CRN of the CDP Environment to host the Dataflow Service
-        - Required when state is present
+      - The CRN of the CDP Environment to host the Dataflow Service
+      - The environment name can also be provided, instead of the CRN
+      - Required when state is present
     type: str
-    required: False
+    required: Conditional
     aliases:
       - name
-      - crn
   df_crn:
     description: 
-        - The CRN of the DataFlow Service, if available
-        - Required when state is absent
+      - The CRN of the DataFlow Service, if available
+      - Required when state is absent
     type: str
     required: Conditional
   state:
@@ -78,12 +78,24 @@
     required: False
     aliases:
       - use_public_load_balancer
-  ip_ranges:
+  loadbalancer_ip_ranges:
+    description: The IP ranges authorized to connect to the load balancer
+    type: list
+    required: False
+  kube_ip_ranges:
     description: The IP ranges authorized to connect to the Kubernetes API server
     type: list
     required: False
-    aliases:
-      - authorized_ip_ranges
+  cluster_subnets:
+    description:
+      - Subnet ids that will be assigned to the Kubernetes cluster
+    type: list
+    required: False
+  loadbalancer_subnets:
+    description:
+      - Subnet ids that will be assigned to the load balancer
+    type: list
+    required: False
   persist:
     description: Whether or not to retain the database records of related entities during removal.
     type: bool
@@ -94,6 +106,12 @@
     type: bool
     required: False
     default: False
+  force:
+    description: Flag to indicate if the DataFlow deletion should be forced.
+    type: bool
+    required: False
+    aliases:
+      - force_delete
   tags:
     description: Tags to apply to the DataFlow Service 
     type: dict
@@ -139,7 +157,7 @@
     nodes_min: 3
     nodes_max: 10
     public_loadbalancer: True
-    ip_ranges: ['192.168.0.1/24']
+    kube_ip_ranges: ['192.168.0.1/24']
     state: present
     wait: yes
 
@@ -386,7 +404,7 @@ def _disable_df(self):
 def main():
     module = AnsibleModule(
         argument_spec=CdpModule.argument_spec(
-            env_crn=dict(type='str'),
+            env_crn=dict(type='str', aliases=['name']),
             df_crn=dict(type='str'),
             nodes_min=dict(type='int', default=3, aliases=['min_k8s_node_count']),
             nodes_max=dict(type='int', default=3, aliases=['max_k8s_node_count']),
@@ -398,8 +416,7 @@ def main():
             persist=dict(type='bool', default=False),
             terminate=dict(type='bool', default=False),
             tags=dict(required=False, type='dict', default=None),
-            state=dict(type='str', choices=['present', 'absent'],
-                       default='present'),
+            state=dict(type='str', choices=['present', 'absent'], default='present'),
             force=dict(type='bool', default=False, aliases=['force_delete']),
             wait=dict(type='bool', default=True),
             delay=dict(type='int', aliases=['polling_delay'], default=15),

From 7b1c9e7a68f98c0db4303a97d8309a85e833c543 Mon Sep 17 00:00:00 2001
From: Andre Araujo <araujo@cloudera.com>
Date: Thu, 7 Jul 2022 12:25:21 +1000
Subject: [PATCH 4/6] Added cluster_subnets_filter and
 loadbalancer_subnets_filter options to module.

Signed-off-by: Andre Araujo <araujo@cloudera.com>
---
 plugins/modules/df_service.py | 85 +++++++++++++++++++++++++++++++----
 1 file changed, 76 insertions(+), 9 deletions(-)

diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py
index d7664ca1..7f3e1dce 100644
--- a/plugins/modules/df_service.py
+++ b/plugins/modules/df_service.py
@@ -14,7 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import json
+import jmespath
 from ansible.module_utils.basic import AnsibleModule
 from ansible_collections.cloudera.cloud.plugins.module_utils.cdp_common import CdpModule
 
@@ -37,7 +38,7 @@
     description: 
       - The CRN of the CDP Environment to host the Dataflow Service
       - The environment name can also be provided, instead of the CRN
-      - Required when state is present
+      - Required when I(state=present)
     type: str
     required: Conditional
     aliases:
@@ -45,7 +46,7 @@
   df_crn:
     description: 
       - The CRN of the DataFlow Service, if available
-      - Required when state is absent
+      - Required when I(state=absent)
     type: str
     required: Conditional
   state:
@@ -82,20 +83,40 @@
     description: The IP ranges authorized to connect to the load balancer
     type: list
     required: False
-  kube_ip_ranges:
+  k8s_ip_ranges:
     description: The IP ranges authorized to connect to the Kubernetes API server
     type: list
     required: False
   cluster_subnets:
     description:
       - Subnet ids that will be assigned to the Kubernetes cluster
+      - Mutually exclusive with the cluster_subnets_filter option 
     type: list
     required: False
+  cluster_subnets_filter:
+    description:
+      - L(JMESPath,https://jmespath.org/) expression to filter the subnets to be used for the Kubernetes cluster
+      - The expression will be applied to the full list of subnets for the specified environment
+      - Each subnet in the list is an object with the following attributes: subnetId, subnetName, availabilityZone, cidr
+      - The filter expression must only filter the list, but not apply any attribute projection
+      - Mutually exclusive with the cluster_subnets option 
+    type: str
+    required: False
   loadbalancer_subnets:
     description:
       - Subnet ids that will be assigned to the load balancer
+      - Mutually exclusive with the loadbalancer_subnets_filter option 
     type: list
     required: False
+  loadbalancer_subnets_filter:
+    description:
+      - L(JMESPath,https://jmespath.org/) expression to filter the subnets to be used for the load balancer
+      - The expression will be applied to the full list of subnets for the specified environment
+      - Each subnet in the list is an object with the following attributes: subnetId, subnetName, availabilityZone, cidr
+      - The filter expression must only filter the list, but not apply any attribute projection
+      - Mutually exclusive with the cluster_subnets option 
+    type: str
+    required: False
   persist:
     description: Whether or not to retain the database records of related entities during removal.
     type: bool
@@ -157,7 +178,9 @@
     nodes_min: 3
     nodes_max: 10
     public_loadbalancer: True
-    kube_ip_ranges: ['192.168.0.1/24']
+    cluster_subnets_filter: "[?contains(subnetName, 'pvt')]"
+    loadbalancer_subnets_filter: "[?contains(subnetName, 'pub')]"
+    k8s_ip_ranges: ['192.168.0.1/24']
     state: present
     wait: yes
 
@@ -273,9 +296,11 @@ def __init__(self, module):
         self.nodes_max = self._get_param('nodes_max')
         self.public_loadbalancer = self._get_param('public_loadbalancer')
         self.lb_ip_ranges = self._get_param('loadbalancer_ip_ranges')
-        self.kube_ip_ranges = self._get_param('kube_ip_ranges')
+        self.k8s_ip_ranges = self._get_param('k8s_ip_ranges')
         self.cluster_subnets = self._get_param('cluster_subnets')
+        self.cluster_subnets_filter = self._get_param('cluster_subnets_filter')
         self.lb_subnets = self._get_param('loadbalancer_subnets')
+        self.lb_subnets_filter = self._get_param('loadbalancer_subnets_filter')
         self.persist = self._get_param('persist')
         self.terminate = self._get_param('terminate')
         self.force = self._get_param('force')
@@ -330,6 +355,21 @@ def process(self):
                     self.module.fail_json(msg="Could not retrieve CRN for CDP Environment %s" % original_env_crn)
                 else:
                     # create DF Service
+                    if self.cluster_subnets_filter or self.lb_subnets_filter:
+                        try:
+                            env_info = self.cdpy.environments.describe_environment(self.env_crn)
+                            subnet_metadata = list(env_info['network']['subnetMetadata'].values())
+                        except Exception:
+                            subnet_metadata = []
+                        if not subnet_metadata:
+                            self.module.fail_json(
+                                msg="Could not retrieve subnet metadata for CDP Environment %s" % self.env_crn)
+
+                        if self.cluster_subnets_filter:
+                            self.cluster_subnets = self._filter_subnets(self.cluster_subnets_filter, subnet_metadata)
+                        if self.lb_subnets_filter:
+                            self.lb_subnets = self._filter_subnets(self.lb_subnets_filter, subnet_metadata)
+
                     if not self.module.check_mode:
                         self.service = self.cdpy.df.enable_service(
                             env_crn=self.env_crn,
@@ -337,7 +377,7 @@ def process(self):
                             max_nodes=self.nodes_max,
                             enable_public_ip=self.public_loadbalancer,
                             lb_ips=self.lb_ip_ranges,
-                            kube_ips=self.kube_ip_ranges,
+                            kube_ips=self.k8s_ip_ranges,
                             # tags=self.tags,  # Currently overstrict blocking of values
                             cluster_subnets=self.cluster_subnets,
                             lb_subnets=self.lb_subnets
@@ -356,6 +396,27 @@ def _wait_for_enabled(self):
             delay=self.delay, timeout=self.timeout
         )
 
+    def _filter_subnets(self, query, subnets):
+        """Apply a JMESPath to an array of subnets and return the id of the selected subnets.
+        The query must only filter the array, without applying any projection. The query result must also be an
+        array of subnet objects.
+
+        :param query: JMESpath query to filter the subnet array.
+        :param subnets: An array of subnet objects. Each subnet in the array is an object with the following attributes:
+        subnetId, subnetName, availabilityZone, cidr.
+        :return: An array of subnet ids.
+        """
+        filtered_subnets = []
+        try:
+            filtered_subnets = jmespath.search(query, subnets)
+        except Exception:
+            self.module.fail_json(msg="The specified subnet filter is an invalid JMESPath expression: " % query)
+        try:
+            return [s['subnetId'] for s in filtered_subnets]
+        except Exception:
+            self.module.fail_json(msg='The subnet filter "%s" should return an array of subnet objects '
+                                      'but instead returned this: %s' % (query, json.dumps(filtered_subnets)))
+
     def _disable_df(self):
         # Attempt clean Disable, which also ensures we have tried at least once before we do a forced removal
         if self.target['status']['state'] in self.cdpy.sdk.REMOVABLE_STATES:
@@ -410,9 +471,11 @@ def main():
             nodes_max=dict(type='int', default=3, aliases=['max_k8s_node_count']),
             public_loadbalancer=dict(type='bool', default=False, aliases=['use_public_load_balancer']),
             loadbalancer_ip_ranges=dict(type='list', elements='str', default=None),
-            kube_ip_ranges=dict(type='list', elements='str', default=None),
+            k8s_ip_ranges=dict(type='list', elements='str', default=None),
             cluster_subnets=dict(type='list', elements='str', default=None),
+            cluster_subnets_filter=dict(type='str', default=None),
             loadbalancer_subnets=dict(type='list', elements='str', default=None),
+            loadbalancer_subnets_filter=dict(type='str', default=None),
             persist=dict(type='bool', default=False),
             terminate=dict(type='bool', default=False),
             tags=dict(required=False, type='dict', default=None),
@@ -426,7 +489,11 @@ def main():
         required_if=[
             ('state', 'present', ('env_crn', ), False),
             ('state', 'absent', ('df_crn', ), False)
-        ]
+        ],
+        mutually_exclusive=[
+            ('cluster_subnets', 'cluster_subnets_filter'),
+            ('loadbalancer_subnets', 'loadbalancer_subnets_filter'),
+        ],
     )
 
     result = DFService(module)

From 03f3bdbe8cc30b2af0f77a7a9f806aeca78c7a5a Mon Sep 17 00:00:00 2001
From: Andre Araujo <araujo@cloudera.com>
Date: Mon, 7 Nov 2022 12:42:48 +1100
Subject: [PATCH 5/6] Added a subnet filter parameter to DataHub Cluster.

Signed-off-by: Andre Araujo <araujo@cloudera.com>
---
 plugins/modules/datahub_cluster.py | 99 +++++++++++++++++++++++++++---
 1 file changed, 89 insertions(+), 10 deletions(-)

diff --git a/plugins/modules/datahub_cluster.py b/plugins/modules/datahub_cluster.py
index 3ef27bc2..0ed86cfc 100644
--- a/plugins/modules/datahub_cluster.py
+++ b/plugins/modules/datahub_cluster.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import jmespath
 from ansible.module_utils.basic import AnsibleModule
 from ansible_collections.cloudera.cloud.plugins.module_utils.cdp_common import CdpModule
 
@@ -75,13 +76,30 @@
     type: str
     required: False
   subnet:
-    description: The subnet ID in AWS, or the Subnet Name on Azure or GCP
+    description:
+      - The subnet ID in AWS, or the Subnet Name on Azure or GCP
+      - Mutually exclusive with the subnet and subnets options 
     type: str
     required: False
     samples:
       - Azure: fe-az-f0-sbnt-2
       - AWS: subnet-0bb1c79de3EXAMPLE
       - GCP: fe-gc-j8-sbnt-0
+  subnets:
+    description:
+      - List of subnet IDs in case of multi availability zone setup.
+      - Mutually exclusive with the subnet and subnets options 
+    type: list
+    required: False
+  subnets_filter:
+    description:
+      - L(JMESPath,https://jmespath.org/) expression to filter the subnets to be used for the load balancer
+      - The expression will be applied to the full list of subnets for the specified environment
+      - Each subnet in the list is an object with the following attributes: subnetId, subnetName, availabilityZone, cidr
+      - The filter expression must only filter the list, but not apply any attribute projection
+      - Mutually exclusive with the subnet and subnets options 
+    type: list
+    required: False
   image:
     description: ID of the image used for cluster instances
     type: str
@@ -152,6 +170,11 @@
     required: False
     aliases:
       - datahub_tags
+  extension:
+    description:
+      - Cluster extensions for Data Hub cluster.
+    type: str
+    required: False
   force:
     description:
       - Flag indicating if the datahub should be force deleted.
@@ -394,11 +417,15 @@ def __init__(self, module):
         self.environment = self._get_param('environment')
         self.definition = self._get_param('definition')
         self.subnet = self._get_param('subnet')
+        self.subnets = self._get_param('subnets')
+        self.subnets_filter = self._get_param('subnets_filter')
         self.image_id = self._get_param('image')
         self.image_catalog = self._get_param('catalog')
         self.template = self._get_param('template')
         self.groups = self._get_param('groups')
         self.tags = self._get_param('tags')
+        self.extension = self._get_param('extension')
+        self.multi_az = self._get_param('multi_az')
 
         self.wait = self._get_param('wait')
         self.delay = self._get_param('delay')
@@ -542,16 +569,40 @@ def _configure_payload(self):
         )
 
         if self.definition is not None:
-          payload["clusterDefinitionName"]=self.definition
+            payload["clusterDefinitionName"] = self.definition
         else:
-          payload["image"]={"id": self.image_id, "catalogName": self.image_catalog}
-          payload["clusterTemplateName"]=self.template
-          payload["instanceGroups"]=self.groups
+            payload["image"] = {"id": self.image_id, "catalogName": self.image_catalog}
+            payload["clusterTemplateName"] = self.template
+            payload["instanceGroups"] = self.groups
+
+        if self.subnets_filter:
+            try:
+                env_info = self.cdpy.environments.describe_environment(self.environment)
+                subnet_metadata = list(env_info['network']['subnetMetadata'].values())
+            except Exception:
+                subnet_metadata = []
+            if not subnet_metadata:
+                self.module.fail_json(
+                    msg="Could not retrieve subnet metadata for CDP Environment %s" % self.env_crn)
+
+            subnets = self._filter_subnets(self.subnets_filter, subnet_metadata)
+            if len(subnets) == 1:
+                self.subnet = subnets[0]
+            else:
+                self.subnets = subnets
 
-        if self.host_env['cloudPlatform'] == 'GCP':
-            payload['subnetName'] = self.subnet
-        else:
-            payload['subnetId'] = self.subnet
+        if self.subnet:
+            if self.host_env['cloudPlatform'] == 'GCP':
+                payload['subnetName'] = self.subnet
+            else:
+                payload['subnetId'] = self.subnet
+        elif self.subnets:
+            payload['subnetIds'] = self.subnets
+
+        if self.extension is not None:
+            payload['clusterExtension'] = self.extension
+
+        payload['multiAz'] = self.multi_az
 
         if self.tags is not None:
             payload['tags'] = list()
@@ -560,6 +611,27 @@ def _configure_payload(self):
 
         return payload
 
+    def _filter_subnets(self, query, subnets):
+        """Apply a JMESPath to an array of subnets and return the id of the selected subnets.
+        The query must only filter the array, without applying any projection. The query result must also be an
+        array of subnet objects.
+
+        :param query: JMESpath query to filter the subnet array.
+        :param subnets: An array of subnet objects. Each subnet in the array is an object with the following attributes:
+        subnetId, subnetName, availabilityZone, cidr.
+        :return: An array of subnet ids.
+        """
+        filtered_subnets = []
+        try:
+            filtered_subnets = jmespath.search(query, subnets)
+        except Exception:
+            self.module.fail_json(msg="The specified subnet filter is an invalid JMESPath expression: " % query)
+        try:
+            return [s['subnetId'] for s in filtered_subnets]
+        except Exception:
+            self.module.fail_json(msg='The subnet filter "%s" should return an array of subnet objects '
+                                      'but instead returned this: %s' % (query, json.dumps(filtered_subnets)))
+
     def _reconcile_existing_state(self, existing):
         mismatched = list()
 
@@ -594,19 +666,26 @@ def main():
             state=dict(required=False, type='str', choices=['present', 'started', 'stopped', 'absent'], default='present'),
             definition=dict(required=False, type='str'),
             subnet=dict(required=False, type='str', default=None),
+            subnets=dict(required=False, type='list', elements='str', default=None),
+            subnets_filter=dict(required=False, type='str', default=None),
             image=dict(required=False, type='str', default=None),
             catalog=dict(required=False, type='str', default=None),
             template=dict(required=False, type='str', default=None),
             groups=dict(required=False, type='list', default=None),
             environment=dict(required=False, type='str', aliases=['env'], default=None),
             tags=dict(required=False, type='dict', aliases=['datahub_tags']),
+            extension=dict(required=False, type='dict'),
+            multi_az=dict(required=False, type='bool', default=True),
 
             force=dict(required=False, type='bool', default=False),
             wait=dict(required=False, type='bool', default=True),
             delay=dict(required=False, type='int', aliases=['polling_delay'], default=15),
             timeout=dict(required=False, type='int', aliases=['polling_timeout'], default=3600)
         ),
-        supports_check_mode=True
+        supports_check_mode=True,
+        mutually_exclusive=[
+            ('subnet', 'subnets', 'subnets_filter'),
+        ],
         #Punting on additional checks here. There are a variety of supporting datahub invocations that can make this more complex
         #required_together=[
         #    ['subnet', 'image', 'catalog', 'template', 'groups', 'environment'],

From 2af8d37c393e29e7d1aac494840061bfb68aed6f Mon Sep 17 00:00:00 2001
From: Andre Araujo <araujo@cloudera.com>
Date: Wed, 4 Jan 2023 11:31:50 +1100
Subject: [PATCH 6/6] Removed non-boolean required references in doc.

Signed-off-by: Andre Araujo <araujo@cloudera.com>
---
 plugins/modules/df_service.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/plugins/modules/df_service.py b/plugins/modules/df_service.py
index 7f3e1dce..72b5afa2 100644
--- a/plugins/modules/df_service.py
+++ b/plugins/modules/df_service.py
@@ -40,7 +40,6 @@
       - The environment name can also be provided, instead of the CRN
       - Required when I(state=present)
     type: str
-    required: Conditional
     aliases:
       - name
   df_crn:
@@ -48,7 +47,6 @@
       - The CRN of the DataFlow Service, if available
       - Required when I(state=absent)
     type: str
-    required: Conditional
   state:
     description:
       - The declarative state of the Dataflow Service