diff options
| author | Suren A. Chilingaryan <csa@suren.me> | 2019-10-06 04:37:01 +0200 | 
|---|---|---|
| committer | Suren A. Chilingaryan <csa@suren.me> | 2019-10-06 04:37:01 +0200 | 
| commit | b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26 (patch) | |
| tree | 35b03ed3f6349de7db2482761ff6295aee5ae813 | |
| parent | 1c830e285e19f2d571cf50ef912f01f0f7d68e10 (diff) | |
| download | itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.tar.gz itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.tar.bz2 itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.tar.xz itm-b17d3d74eb5a9e7640d94f98f6b27ce4891b3c26.zip  | |
Integration of CentOS8 and ipecompute nodes
26 files changed, 348 insertions, 31 deletions
diff --git a/docker.yml b/docker.yml new file mode 100644 index 0000000..ea91aed --- /dev/null +++ b/docker.yml @@ -0,0 +1,6 @@ +- name: Docker +  hosts: all +  remote_user: root +  roles: +    - role: docker + diff --git a/install.yml b/install.yml index 278dac9..f1acdd8 100644 --- a/install.yml +++ b/install.yml @@ -15,9 +15,29 @@    remote_user: root    roles:      - role: cuda + +# The AMD driver is "surprisingly" crashing +#- name: ROCM +#  hosts: rcom +#  remote_user: root +#  roles: +#    - role: rocm + + +- name: Docker +  hosts: docker +  remote_user: root +  roles: +    - role: docker  - name: Desktop    hosts: desktop    remote_user: root    roles:      - role: desktop + +- name: Additional Local and Network Storage +  hosts: infra +  remote_user: root +  roles: +    - role: storage diff --git a/inventories/ipe.erb b/inventories/ipe.erb index df62890..20edf72 100644 --- a/inventories/ipe.erb +++ b/inventories/ipe.erb @@ -1,6 +1,9 @@  [ands]  192.168.26.[140:149] +[compute] +192.168.26.[130:139] +  [camera]  192.168.26.[80:89] @@ -13,7 +16,21 @@ student  [cuda]  192.168.26.[80:84]  192.168.26.[86:89] +192.168.26.[131:133] + +[rocm] +192.168.26.134 + +[docker] +192.168.26.[131:139]  [ib]  192.168.26.[60:69]  192.168.26.[80:89] +192.168.26.[130:139] +192.168.26.[140:149] + +[infra] +192.168.26.[80:89] +192.168.26.[130:139] +192.168.26.[140:149] diff --git a/rocm.yml b/rocm.yml new file mode 100644 index 0000000..c76c068 --- /dev/null +++ b/rocm.yml @@ -0,0 +1,7 @@ +- name: Common Software +  hosts: all +  remote_user: root +  roles: +    - role: rocm + + diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 286a027..9f3cf79 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -5,14 +5,11 @@      - epel-release    when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux' -- name: Add our repository with updates and overrides -  yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}" -  with_items: "{{ ands_repositories | default([]) }}" - -# We always update on first install and if requested -- name: Update CentOS -  yum: name=* state=latest update_cache=yes -  when: (result | changed) or (os_update | default(false)) +- include_tasks: main_yum.yml +  when: ansible_pkg_mgr == 'yum'  +   +- include_tasks: main_dnf.yml +  when: ansible_pkg_mgr == 'dnf'  - name: Install additional software    include_tasks: software.yml diff --git a/roles/common/tasks/main_dnf.yml b/roles/common/tasks/main_dnf.yml new file mode 100644 index 0000000..0572132 --- /dev/null +++ b/roles/common/tasks/main_dnf.yml @@ -0,0 +1,15 @@ +- name: Add our repository with updates and overrides +  yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}" +  with_items: "{{ ands_repositories | default([]) }}" + +# We always update on first install and if requested +- name: Update CentOS +  dnf: name=* state=latest +  when: (result | changed) or (os_update | default(false)) + +- name: Install various ansible requirements +  package: name={{item}} state=present +  with_items:  +    - yum-plugin-versionlock +    - python-rhsm-certificates +#    - iptables-services diff --git a/roles/common/tasks/main_yum.yml b/roles/common/tasks/main_yum.yml new file mode 100644 index 0000000..2b320d5 --- /dev/null +++ b/roles/common/tasks/main_yum.yml @@ -0,0 +1,24 @@ +- name: Add our repository with updates and overrides +  yum_repository: name="{{ item.name }}" description= "{{ item.description | default('Ands repository') }}" baseurl="{{ item.url }}" enabled="yes" gpgcheck="no" cost="{{ item.cost | default(1) }}" +  with_items: "{{ ands_repositories | default([]) }}" + +# We always update on first install and if requested +- name: Update CentOS +  yum: name=* state=latest update_cache=yes +  when: (result | changed) or (os_update | default(false)) + +- name: Install various ansible requirements +  package: name={{item}} state=present +  with_items:  +    - yum-plugin-versionlock +    - libselinux-python +    - libsemanage-python +    - yamllint +    - pyOpenSSL +    - python-passlib +    - python2-ruamel-yaml +    - python2-jmespath +    - python-ipaddress +    - iptables-services +    - PyYAML +    - python-rhsm-certificates diff --git a/roles/common/tasks/software.yml b/roles/common/tasks/software.yml index c621ef3..3a1a5c1 100644 --- a/roles/common/tasks/software.yml +++ b/roles/common/tasks/software.yml @@ -1,19 +1,3 @@ -- name: Install various ansible requirements -  package: name={{item}} state=present -  with_items:  -    - yum-plugin-versionlock -    - libselinux-python -    - libsemanage-python -    - yamllint -    - pyOpenSSL -    - python-passlib -    - python2-ruamel-yaml -    - python2-jmespath -    - python-ipaddress -    - iptables-services -    - PyYAML -    - python-rhsm-certificates -  - name: Install various administrative tools    package: name={{item}} state=present    with_items:  @@ -21,7 +5,6 @@      - telnet      - lsof      - strace -    - bzr      - git      - pciutils diff --git a/roles/cuda/vars/centos-8.yml b/roles/cuda/vars/centos-8.yml new file mode 100644 index 0000000..935e84d --- /dev/null +++ b/roles/cuda/vars/centos-8.yml @@ -0,0 +1,4 @@ +--- +cuda_repo_subfolder: rhel8 + +# vim:ft=ansible:
\ No newline at end of file diff --git a/roles/cuda/vars/redhat-8.yml b/roles/cuda/vars/redhat-8.yml new file mode 100644 index 0000000..935e84d --- /dev/null +++ b/roles/cuda/vars/redhat-8.yml @@ -0,0 +1,4 @@ +--- +cuda_repo_subfolder: rhel8 + +# vim:ft=ansible:
\ No newline at end of file diff --git a/roles/docker/defaults/main.yml b/roles/docker/defaults/main.yml new file mode 100644 index 0000000..a5bcb04 --- /dev/null +++ b/roles/docker/defaults/main.yml @@ -0,0 +1,8 @@ +--- +docker_repo_url: "https://download.docker.com/linux" +nvidia_docker_repo_url: "https://nvidia.github.io" + +nvidia_repos: +  - libnvidia-container +  - nvidia-container-runtime +  - nvidia-docker diff --git a/roles/docker/handlers/main.yml b/roles/docker/handlers/main.yml new file mode 100644 index 0000000..3eb0349 --- /dev/null +++ b/roles/docker/handlers/main.yml @@ -0,0 +1,4 @@ +--- +- name:  restart docker +  systemd: name="docker" daemon_reload="yes" state="restarted" +  become: yes diff --git a/roles/docker/tasks/configure_apt.yml b/roles/docker/tasks/configure_apt.yml new file mode 100644 index 0000000..3fd961a --- /dev/null +++ b/roles/docker/tasks/configure_apt.yml @@ -0,0 +1,15 @@ +--- +# tasks file for ansible-role-cuda +- name: Trust packaging key for Nvidia repositories (apt) +  apt_key: +    data: "{{ lookup('file', 'files/nvidia_docker_packaging_key.asc') }}" +    id: "{{ nvidia_docker_packaging_key_id }}" +    state: present + +- name: Configure Nvidia repository (apt) +  apt_repository: +    repo: "deb {{ nvidia_docker_repo_url }}/{{ nvidia_docker_repo_subfolder }} /" +    filename: nvidia_docker +    state: present + +# vim:ft=ansible: diff --git a/roles/docker/tasks/configure_dnf.yml b/roles/docker/tasks/configure_dnf.yml new file mode 100644 index 0000000..73ecb30 --- /dev/null +++ b/roles/docker/tasks/configure_dnf.yml @@ -0,0 +1,18 @@ +--- +- name: Import NVIDIA Docker repository gpg keys +  rpm_key: +    key: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey" +    state: present +  with_items: "{{ nvidia_repos }}" +  when: "'cuda' in group_names" + +- name: Configure Nvidia repositories (yum) +  yum_repository: +    name: "{{ item }}" +    description: Official {{ item }} repository +    baseurl: "{{ nvidia_docker_repo_url }}/{{ item }}/{{ nvidia_docker_repo_subfolder }}/x86_64/" +    gpgkey: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey" +    gpgcheck: no +    enabled: yes +  with_items: "{{ nvidia_repos }}" +  when: "'cuda' in group_names" diff --git a/roles/docker/tasks/configure_yum.yml b/roles/docker/tasks/configure_yum.yml new file mode 100644 index 0000000..99a2743 --- /dev/null +++ b/roles/docker/tasks/configure_yum.yml @@ -0,0 +1,38 @@ +--- +- name: Upload packaging key for docker repositories +  copy: +    src: docker_packaging_key.asc +    dest: "{{ docker_rpm_key_path }}" +    mode: 0644 + +- name: Import Docker CE repository gpg key +  rpm_key: +    key: https://download.docker.com/linux/centos/gpg +    state: present + +- name: Import NVIDIA Docker repository gpg keys +  rpm_key: +    key: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey" +    state: present +  with_items: "{{ nvidia_repos }}" + +- name: Configure docker repositories (yum) +  yum_repository: +    name: "docker-ce-{{ item }}" +    description: Official docker-ce repository +    baseurl: "{{ docker_repo_url }}/{{ docker_repo_subfolder }}/x86_64/{{ item }}" +    gpgkey: https://download.docker.com/linux/centos/gpg +    gpgcheck: yes +    enabled: yes +  with_items: +    - stable + +- name: Configure Nvidia repositories (yum) +  yum_repository: +    name: "{{ item }}" +    description: Official {{ item }} repository +    baseurl: "{{ nvidia_docker_repo_url }}/{{ item }}/{{ nvidia_docker_repo_subfolder }}/x86_64/" +    gpgkey: "{{ nvidia_docker_repo_url }}/{{ item }}/gpgkey" +    gpgcheck: no +    enabled: yes +  with_items: "{{ nvidia_repos }}" diff --git a/roles/docker/tasks/install_docker.yml b/roles/docker/tasks/install_docker.yml new file mode 100644 index 0000000..9ae0cb9 --- /dev/null +++ b/roles/docker/tasks/install_docker.yml @@ -0,0 +1,14 @@ +- name: Install requirements +  package: name="{{ item }}" state=present +  with_items: +    - lvm2 + +- name: Install nvidia docker +  package: name="nvidia-docker2" state=present +  notify: +    - restart docker + +- name: Install additional packages +  package: name="{{ item }}" state=present +  with_items: +    - docker-compose diff --git a/roles/docker/tasks/install_podman.yml b/roles/docker/tasks/install_podman.yml new file mode 100644 index 0000000..3498aa7 --- /dev/null +++ b/roles/docker/tasks/install_podman.yml @@ -0,0 +1,12 @@ +- name: Install docker packages +  package: name="{{ item }}" state=present +  with_items: +    - podman +    - buildah +    - skopeo + +- name: Install NVIDIA packages +  package: name="{{ item }}" state=present +  with_items: +    - nvidia-container-runtime +  when: "'cuda' in group_names" diff --git a/roles/docker/tasks/main.yml b/roles/docker/tasks/main.yml new file mode 100644 index 0000000..f13f99f --- /dev/null +++ b/roles/docker/tasks/main.yml @@ -0,0 +1,24 @@ +--- +- name: "Gather OS specific variables" +  include_vars: "{{ item }}" +  with_first_found: +    - "{{ ansible_distribution|lower }}-{{ ansible_distribution_version }}.yml" +    - "{{ ansible_distribution|lower }}-{{ ansible_distribution_major_version }}.yml" +    - "{{ ansible_distribution|lower }}.yml" +    - "{{ ansible_os_family|lower }}.yml" + +- include_tasks: configure_yum.yml +  when: ansible_pkg_mgr == 'yum' + +- include_tasks: configure_dnf.yml +  when: ansible_pkg_mgr == 'dnf' + +- include_tasks: configure_apt.yml +  when: ansible_pkg_mgr == 'apt' + +- include_tasks: install_docker.yml +  when: ansible_pkg_mgr == 'yum' or  ansible_pkg_mgr == 'apt' + +- include_tasks: install_podman.yml +  when: ansible_pkg_mgr == 'dnf' + diff --git a/roles/docker/vars/centos-7.yml b/roles/docker/vars/centos-7.yml new file mode 100644 index 0000000..e681468 --- /dev/null +++ b/roles/docker/vars/centos-7.yml @@ -0,0 +1,3 @@ +--- +nvidia_docker_repo_subfolder: centos7 +docker_repo_subfolder: centos/7 diff --git a/roles/docker/vars/centos-8.yml b/roles/docker/vars/centos-8.yml new file mode 100644 index 0000000..d4d24fe --- /dev/null +++ b/roles/docker/vars/centos-8.yml @@ -0,0 +1,11 @@ +--- +# While we have RHEL8 repo, in fact it references centos7 packages +# https://nvidia.github.io/nvidia-docker/rhel8.0/nvidia-docker.repo +# nvidia_docker_repo_subfolder: rhel8.0 +nvidia_docker_repo_subfolder: centos7 +#docker_repo_subfolder: centos/8 + + +nvidia_repos: +  - libnvidia-container +  - nvidia-container-runtime diff --git a/roles/rocm/tasks/main.yml b/roles/rocm/tasks/main.yml new file mode 100644 index 0000000..4ae1a87 --- /dev/null +++ b/roles/rocm/tasks/main.yml @@ -0,0 +1,20 @@ +- name: Configure DarkSoft repositories (for packages mangling provides/requires to suit ROCm) +  yum_repository: +    name: "ands_centos8" +    description: Various packages for CentOS8 +    baseurl: "http://ufo.kit.edu/ands/repos/centos8/centos8/" +    gpgcheck: no +    enabled: yes + +- name: Configure ROCm repositories (yum) +  yum_repository: +    name: "rocm" +    description: AMD ROCm Drivers and Infrastructure +    baseurl: "http://repo.radeon.com/rocm/yum/rpm/" +    gpgcheck: no +    enabled: yes + +- name: Install ROCm drivers and packages +  package: name="{{ item }}" state=present +  with_items: +    - rocm-dkms diff --git a/roles/storage/defaults/main.yml b/roles/storage/defaults/main.yml new file mode 100644 index 0000000..ca36e70 --- /dev/null +++ b/roles/storage/defaults/main.yml @@ -0,0 +1 @@ +compute4_ssds: ['sda','sdb','sdc','sdd','sde','sdf','sdg','sdh'] diff --git a/roles/storage/tasks/ipecompute2.yml b/roles/storage/tasks/ipecompute2.yml new file mode 100644 index 0000000..9b2cef8 --- /dev/null +++ b/roles/storage/tasks/ipecompute2.yml @@ -0,0 +1,17 @@ +- name: Delete partitions +  parted: device="/dev/sda" label="gpt" number="{{ item }}" state="absent" +  with_items: [ 2, 3, 4 ] + +- name: Create partition +  parted:  +    device: "/dev/sda"  +    label: "gpt"  +    number: 1  +    name: "fast"  +    state: "present"  + +- name: arrays | Creating Array(s) Filesystem +  filesystem: dev="/dev/sda1" fstype="xfs" + +- name: arrays | Mounting Array(s) +  mount: name="/mnt/fast" src="/dev/sda1" fstype="xfs" state="mounted" diff --git a/roles/storage/tasks/ipecompute4.yml b/roles/storage/tasks/ipecompute4.yml new file mode 100644 index 0000000..5b3a88f --- /dev/null +++ b/roles/storage/tasks/ipecompute4.yml @@ -0,0 +1,35 @@ +--- +#- name: Delete partitions +#  parted: device="/dev/{{ item[0] }}" label="gpt" number="{{ item[1] }}" state="absent" +#  with_nested: +#    - "{{ compute4_ssds }}" +#    - [ 2, 3, 4 ] + +- name: Create partition +  parted:  +    device: "/dev/{{ item }}"  +    label: "gpt"  +    number: 1  +    name: "softraid"  +    flags: [raid] +    state: "present"  +  failed_when: false +  with_items: "{{ compute4_ssds }}" + +- name: arrays | Checking Status Of Array(s) +  shell: "cat /proc/mdstat | grep md10" +  register: "array_check" +  changed_when: false +  failed_when: false +  check_mode: no + +- name: arrays | Creating Array(s) +  shell: "yes | mdadm --create /dev/md10 --level=0 --raid-devices={{ compute4_ssds | count }} {{ compute4_ssds | map('regex_replace', '(.*)', '/dev/\\1') | join ('1 ') }}1" +  register: "array_created" +  when: array_check.rc != 0 + +- name: arrays | Creating Array(s) Filesystem +  filesystem: dev="/dev/md10" fstype="xfs" + +- name: arrays | Mounting Array(s) +  mount: name="/mnt/fast" src="/dev/md10" fstype="xfs" state="mounted" diff --git a/roles/storage/tasks/main.yml b/roles/storage/tasks/main.yml index 871e785..014e396 100644 --- a/roles/storage/tasks/main.yml +++ b/roles/storage/tasks/main.yml @@ -1,9 +1,17 @@  --- -- name: Ensure NFS common is installed. -  package: name=nfs-utils state=present +- name: Ensure required software is installed. +  package: name="{{ item }}" state=present +  with_items: [ 'parted', 'mdadm', 'nfs-utils' ] -- name: Create mountable dir -  file: path=/mnt/ands state=directory mode=755 owner=root group=root +- debug: msg="{{ inventory_hostname }}" -- name: set mountpoints -  mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted +- name: configure network fs +  include_tasks: nfs.yml + +- name: configure ipepdvcompute2 +  include_tasks: ipecompute2.yml +  when: inventory_hostname == '192.168.26.132' + +- name: configure ipepdvcompute4 +  include_tasks: ipecompute4.yml +  when: inventory_hostname == '192.168.26.134' diff --git a/roles/storage/tasks/nfs.yml b/roles/storage/tasks/nfs.yml new file mode 100644 index 0000000..9dbd467 --- /dev/null +++ b/roles/storage/tasks/nfs.yml @@ -0,0 +1,12 @@ +--- +- name: Create mountable dir +  file: path=/mnt/ands state=directory mode=755 owner=root group=root + +- name: Create mountable dir +  file: path=/mnt/pdv state=directory mode=755 owner=root group=root + +- name: set mountpoints +  mount: name=/mnt/ands src=192.168.26.140:/mnt/ands fstype=nfs4 opts=defaults,minorversion=1,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=absent + +- name: set mountpoints +  mount: name=/mnt/pdv src=192.168.26.170:/pdv fstype=nfs opts=defaults,_netdev,nofail,soft,nodiratime,noatime dump=0 passno=0 state=mounted  | 
