From 67c430fec55c9f641d810cc480fc84641e86ff66 Mon Sep 17 00:00:00 2001 From: Daniel Chaffelson Date: Mon, 20 Sep 2021 13:45:37 +0100 Subject: [PATCH 1/4] Add EL8 support to Cloudera Collections Set default CM version to 7.4.4 in cloudera.cluster Derive parcel version of el7 or el8 from inventory target OS family in cloudera.cluster.cloudera_manager.repo Ensure that Python2/3 are present on nodes when deploying on Rhel8 family OS Change cloudera-deploy defaults to el7, but allow el8 to be specified and recognised Update utility VM used for download mirror to function correctly with el7 or el8 OS Add default_cluster tag to cluster.yml playbook CM Agent heartbeat test to avoid obscure failures Signed-off-by: Daniel Chaffelson --- cluster.yml | 1 + roles/cloudera_deploy/defaults/main.yml | 2 +- .../tasks/populate_download_mirror.yml | 21 ++++++++++++++++++- .../tasks/prepare_download_mirror.yml | 3 ++- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/cluster.yml b/cluster.yml index 8439680..b51d761 100644 --- a/cluster.yml +++ b/cluster.yml @@ -441,6 +441,7 @@ when: cloudera_manager_agent_wait_for_heartbeat | default(True) tags: - heartbeat + - default_cluster - full_cluster - name: Deploy Cloudera Management Service diff --git a/roles/cloudera_deploy/defaults/main.yml b/roles/cloudera_deploy/defaults/main.yml index 2331e69..e9b67c3 100644 --- a/roles/cloudera_deploy/defaults/main.yml +++ b/roles/cloudera_deploy/defaults/main.yml @@ -43,7 +43,7 @@ use_default_cluster_definition: no # Default parcel cache default_enable_download_mirror: no -default_parcel_distro: el7.parcel +default_parcel_distro: el7 default_download_link_expiry: 3600 # Default Deployment Controls diff --git a/roles/cloudera_deploy/tasks/populate_download_mirror.yml b/roles/cloudera_deploy/tasks/populate_download_mirror.yml index d75300a..2177bfc 100644 --- a/roles/cloudera_deploy/tasks/populate_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/populate_download_mirror.yml @@ -66,7 +66,26 @@ when: globals.infra_type == 'aws' block: # Prepare to sync cache dir to S3 - - name: Prepare host for Python actions + - name: Setup System Rhel8 + when: + - ansible_os_family == 'RedHat' + - ansible_distribution_major_version | int >= 8 + become: yes + ansible.builtin.package: + lock_timeout: 180 + name: "{{ __package_item }}" + update_cache: yes + state: present + loop_control: + loop_var: __package_item + loop: + - epel-release + - python3 + + - name: Setup system Rhel7 + when: + - ansible_os_family == 'RedHat' + - ansible_distribution_major_version | int < 8 become: yes ansible.builtin.package: name: "{{ __package_item }}" diff --git a/roles/cloudera_deploy/tasks/prepare_download_mirror.yml b/roles/cloudera_deploy/tasks/prepare_download_mirror.yml index 80fed64..69db157 100644 --- a/roles/cloudera_deploy/tasks/prepare_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/prepare_download_mirror.yml @@ -43,10 +43,11 @@ - name: Extract Parcel URLs from Manifests ansible.builtin.set_fact: __parcel_urls: "{{ manifests.results | cloudera.cluster.extract_parcel_urls }}" + __parcel_distro_search_term: "{{ parcel_distro | default(default_parcel_distro) }}.parcel" - name: Filter Parcels by distro ansible.builtin.set_fact: - __filtered_parcel_urls: "{{ __parcel_urls | select('search', parcel_distro | default(default_parcel_distro)) | list }}" + __filtered_parcel_urls: "{{ __parcel_urls | select('search', __parcel_distro_search_term ) | list }}" - name: Prepare target Download Mirror listing with parcels and attendant files when: __filtered_parcel_urls | length > 0 From 936c12488fd618cb34cd04676cfe7c6a96f805dd Mon Sep 17 00:00:00 2001 From: Daniel Chaffelson Date: Fri, 24 Sep 2021 10:27:19 +0100 Subject: [PATCH 2/4] Add selectable distribution support for cloudera.cluster Enable by setting parcel_distro in definition to [el7, el8, or bionic] per dynamic OS options in cloudera.exe.infrastructure.vars Change default dynamic inventory selection strings from 'centos7' to match the distribution identity strings of el7 etc. - user can update defaults to a different el7 distro etc. in cloudera.exe.infrastructure.vars Add Ubuntu 18.04 'bionic' as option to dynamic inventory Determine preferred parcel distribution in cloudera-deploy init Add uniqueness to generated dynamic inventory VM name to reflect selection of distribution in case multiple clusters are deployed in the same account Move dynamic inventory OS selection to globals, update appropriate reference docs to reflect change Add filtering by distro to download mirror support, and ensure that manifest is still always collected Enforce no_log always when working with Paywall credentials Increase initial paywall download timeout to 7200s due to present CDN speed issues when deploying on EC2 outside of us-east-1 Modify ansible.builtin.package lock_timeout to only be used on RedHat, as it is not a Debian option Pass selected parcel distribution to repo analysis during intial deployment when target cluster OS is not yet determined by deployment Move cloudera.cluster plays which require knowledge of the cluster distribution to run on the cloudera_manager host instead of the Ansible controller so the correct distribution actions are applied Fix extract_products_from_manifests filter in cloudera.cluster to correctly reference self and process os_distribution value Fix import ordering in cloudera.cluster filters.py to not break under recent versions of Python3 Add distribution specific tasks for cloudera.cluster.deployment.repometa so it can identify the cluster distribution using the strings recognised by Cloudera Manager deployment Force refresh of apt package cache on Debian distributions during OS prereqs setup in cloudera.cluster as the package cache in the image is sometimes missing packages Signed-off-by: Daniel Chaffelson --- cluster.yml | 6 +++--- roles/cloudera_deploy/defaults/main.yml | 2 +- roles/cloudera_deploy/tasks/init.yml | 6 ++++++ .../tasks/inject_download_mirror.yml | 18 ++++++++++++++++-- .../tasks/populate_download_mirror.yml | 8 ++++---- .../tasks/prepare_download_mirror.yml | 3 ++- 6 files changed, 32 insertions(+), 11 deletions(-) diff --git a/cluster.yml b/cluster.yml index b51d761..e174635 100644 --- a/cluster.yml +++ b/cluster.yml @@ -69,7 +69,7 @@ - full_cluster - name: Verify definition [verify_definition] - hosts: localhost + hosts: cloudera_manager gather_facts: no roles: - cloudera.cluster.verify.definition @@ -92,7 +92,7 @@ # Moved from verify_parcels to reduce duplication - name: Verify definition [verify_parcels_and_roles] - hosts: localhost + hosts: cloudera_manager gather_facts: no roles: - cloudera.cluster.verify.parcels_and_roles @@ -467,7 +467,7 @@ - full_cluster - name: Deploy clusters - hosts: localhost + hosts: cloudera_manager gather_facts: no roles: - cloudera.cluster.deployment.cluster diff --git a/roles/cloudera_deploy/defaults/main.yml b/roles/cloudera_deploy/defaults/main.yml index e9b67c3..6ea7852 100644 --- a/roles/cloudera_deploy/defaults/main.yml +++ b/roles/cloudera_deploy/defaults/main.yml @@ -43,7 +43,7 @@ use_default_cluster_definition: no # Default parcel cache default_enable_download_mirror: no -default_parcel_distro: el7 +default_parcel_distro: el7 # el8, bionic default_download_link_expiry: 3600 # Default Deployment Controls diff --git a/roles/cloudera_deploy/tasks/init.yml b/roles/cloudera_deploy/tasks/init.yml index 39565d1..fec411c 100644 --- a/roles/cloudera_deploy/tasks/init.yml +++ b/roles/cloudera_deploy/tasks/init.yml @@ -277,6 +277,11 @@ - "Permissions are {{ __private_key_file_stat.stat.mode }}" - "Permissions should be 0400 or 0600" +# Parcel Distro +- name: Determine preferred Parcel Distribution + ansible.builtin.set_fact: + init__parcel_distro: "{{ parcel_distro | default(default_parcel_distro) }}" + # Read in Dynamic Inventory - name: Seek Inventory Template in Definition Path register: __di_template_stat @@ -329,6 +334,7 @@ - dynamic_inventory: vm: count: "{{ __dynamic_inventory_host_list | count }}" + os: "{{ init__parcel_distro }}" always: - name: Remove Dynamic Inventory Template from current inventory include_tasks: refresh_inventory.yml diff --git a/roles/cloudera_deploy/tasks/inject_download_mirror.yml b/roles/cloudera_deploy/tasks/inject_download_mirror.yml index 0851f70..f087f5b 100644 --- a/roles/cloudera_deploy/tasks/inject_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/inject_download_mirror.yml @@ -86,12 +86,26 @@ ansible.builtin.set_fact: init__cluster_repo_entries: "{{ init__cluster_repo_entries | default([]) + [__cluster_repo_item | urlsplit('path') ] }}" - - name: Create list of Download Mirror URLs filtered to required repositories for run + - name: Create list of Download Mirror URLs filtered to required repositories and distros loop: "{{ init__cluster_repo_entries }}" loop_control: loop_var: __cluster_repo_path_item ansible.builtin.set_fact: - init__urls_to_sign: "{{ init__urls_to_sign | default([]) + __download_mirror_ini_entry | select('search', __cluster_repo_path_item) | list }}" + init__urls_to_sign: "{{ init__urls_to_sign + | default([]) + __download_mirror_ini_entry + | select('search', __cluster_repo_path_item) + | select('search', init__parcel_distro) + | list }}" + + - name: Ensure manifest is included in Download Mirror URLs if present + loop: "{{ init__cluster_repo_entries }}" + loop_control: + loop_var: __cluster_repo_path_item + ansible.builtin.set_fact: + init__urls_to_sign: "{{ init__urls_to_sign + | default([]) + __download_mirror_ini_entry + | select('search', 'manifest.json') + | list }}" - name: Get AWS Specific download URIs when: diff --git a/roles/cloudera_deploy/tasks/populate_download_mirror.yml b/roles/cloudera_deploy/tasks/populate_download_mirror.yml index 2177bfc..34afae7 100644 --- a/roles/cloudera_deploy/tasks/populate_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/populate_download_mirror.yml @@ -28,7 +28,7 @@ loop: "{{ globals.download_mirror_targets }}" loop_control: loop_var: __mirror_fetch_item - async: 3600 + async: 7200 poll: 0 ansible.builtin.get_url: url: "{{ __mirror_fetch_item }}" @@ -42,8 +42,8 @@ loop_var: __download_async_item register: __async_download_results until: __async_download_results.finished is defined and __async_download_results.finished - delay: 15 - retries: 300 + delay: 30 + retries: 240 async_status: jid: "{{ __download_async_item.ansible_job_id }}" failed_when: @@ -90,7 +90,7 @@ ansible.builtin.package: name: "{{ __package_item }}" state: present - lock_timeout: 180 + lock_timeout: "{{ (ansible_os_family == 'RedHat') | ternary(180, omit) }}" loop_control: loop_var: __package_item loop: diff --git a/roles/cloudera_deploy/tasks/prepare_download_mirror.yml b/roles/cloudera_deploy/tasks/prepare_download_mirror.yml index 69db157..10ef568 100644 --- a/roles/cloudera_deploy/tasks/prepare_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/prepare_download_mirror.yml @@ -39,11 +39,12 @@ public: yes vars: repositories: "{{ __init_parcel_repos }}" + cluster_os_distribution: "{{ init__parcel_distro }}" - name: Extract Parcel URLs from Manifests ansible.builtin.set_fact: __parcel_urls: "{{ manifests.results | cloudera.cluster.extract_parcel_urls }}" - __parcel_distro_search_term: "{{ parcel_distro | default(default_parcel_distro) }}.parcel" + __parcel_distro_search_term: "{{ init__parcel_distro }}.parcel" - name: Filter Parcels by distro ansible.builtin.set_fact: From 68a2f3949b0889e4d8fcbdf36db3e1d312b72037 Mon Sep 17 00:00:00 2001 From: Daniel Chaffelson Date: Tue, 26 Oct 2021 17:41:15 +0100 Subject: [PATCH 3/4] Improve for CDH5 and Centos7 Pin boto3 version <1.18 when using Python2 for s3sync to work Set GPG to not be checked by default when deploying cm5 Set variants for cm5 and cm6/7 paths for cloudera manager URL Signed-off-by: Daniel Chaffelson --- roles/cloudera_deploy/tasks/populate_download_mirror.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/cloudera_deploy/tasks/populate_download_mirror.yml b/roles/cloudera_deploy/tasks/populate_download_mirror.yml index 34afae7..65d7fa3 100644 --- a/roles/cloudera_deploy/tasks/populate_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/populate_download_mirror.yml @@ -105,7 +105,7 @@ loop_var: __pip_item loop: - futures - - boto3 >= 1.4.4 + - boto3 >= 1.4.4,<1.18 - name: Sync downloaded Files paths to S3 cache bucket become: yes From a78020a37feca0097a96659e5809f4eb2bab443b Mon Sep 17 00:00:00 2001 From: Daniel Chaffelson Date: Thu, 18 Nov 2021 15:27:00 +0000 Subject: [PATCH 4/4] Further multi os support improvements for tls Add support for Ubuntu 20.04 focal fossa Download mirror now correctly works with Ubuntu 18 or 20 Fix issue for boto3 depending on py2 or py3 Ensure os prerequisites are also applied to the ca_server node Signed-off-by: Daniel Chaffelson --- cluster.yml | 2 +- roles/cloudera_deploy/defaults/main.yml | 2 +- .../tasks/populate_download_mirror.yml | 23 ++++++++++++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/cluster.yml b/cluster.yml index e174635..4c13029 100644 --- a/cluster.yml +++ b/cluster.yml @@ -106,7 +106,7 @@ # STARTBLOCK # Prepare Nodes - name: Apply OS pre-requisite configurations - hosts: cloudera_manager, cluster + hosts: cloudera_manager, cluster, ca_server become: yes roles: - cloudera.cluster.prereqs.os diff --git a/roles/cloudera_deploy/defaults/main.yml b/roles/cloudera_deploy/defaults/main.yml index 6ea7852..35e16e0 100644 --- a/roles/cloudera_deploy/defaults/main.yml +++ b/roles/cloudera_deploy/defaults/main.yml @@ -43,7 +43,7 @@ use_default_cluster_definition: no # Default parcel cache default_enable_download_mirror: no -default_parcel_distro: el7 # el8, bionic +default_parcel_distro: el7 # el8, bionic, focal default_download_link_expiry: 3600 # Default Deployment Controls diff --git a/roles/cloudera_deploy/tasks/populate_download_mirror.yml b/roles/cloudera_deploy/tasks/populate_download_mirror.yml index 65d7fa3..c2681a5 100644 --- a/roles/cloudera_deploy/tasks/populate_download_mirror.yml +++ b/roles/cloudera_deploy/tasks/populate_download_mirror.yml @@ -97,6 +97,27 @@ - epel-release - python-pip + - name: Setup system Debian + when: ansible_os_family == "Debian" + block: + - name: enable Debian Repos + become: yes + apt_repository: + repo: "{{ __repo_item }}" + loop_control: + loop_var: __repo_item + loop: + - "deb http://archive.ubuntu.com/ubuntu/ {{ globals.dynamic_inventory.vm.os }} universe" + - "deb http://archive.ubuntu.com/ubuntu/ {{ globals.dynamic_inventory.vm.os }}-updates universe" + - "deb http://security.ubuntu.com/ubuntu/ {{ globals.dynamic_inventory.vm.os }}-security universe" + + - name: Install Pip on Debian + become: yes + ansible.builtin.apt: + update_cache: yes + name: python3-pip + state: present + - name: Prepare host for s3 actions become: yes ansible.builtin.pip: @@ -105,7 +126,7 @@ loop_var: __pip_item loop: - futures - - boto3 >= 1.4.4,<1.18 + - "{{ (ansible_python_version[0] == '2') | ternary('boto3 >= 1.4.4,<1.18', 'boto3 >= 1.20.0') }}" - name: Sync downloaded Files paths to S3 cache bucket become: yes