diff --git a/CHANGELOG.md b/CHANGELOG.md index d3c5fd1f497ff4bc8e6a96e9c49045d99c77a0eb..88de55eab45206d233149fe094c2d0df49953ec3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). -## Unreleased +## 2.4.0 - 2023-12-21 ### Changed - changed default priority for redirect to https to be part 9999 - move metallb specific pieces from raw to metallb application - traefik doesn't use persistant volumes if acme is not enabled +- Use apt-get instead of apt in node provisioning +- Parameterize OpenStack region name ### Fixed - added pod-security on namespaces to work correctly (needed for talos) @@ -21,6 +23,14 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p ### Added - cert-manager can now be installed +- nodes are labeled with `ncsa.role` and `ncsa.flavor` from cluster.json +- added option `install_docker` to disable Docker installation when provisioning nodes +- added option `taiga_enabled` to disable Taiga actions in node provisioning +- added option `ncsa_security` to install ncsa specific security options + - disable IPv6 + - configure chrony for NCSA + - configure rsyslog for NCSA + - add qualys account ## 2.3.5 - 2023-09-09 diff --git a/README.md b/README.md index fb7f602f808033ab17a4991aab56035fb35903dd..bbe7a25f1dc1923da7109605383184b467c093b4 100644 --- a/README.md +++ b/README.md @@ -49,9 +49,26 @@ cluster: - docker installed - connected to rancher +### Definition of machines + +Create a file named `cluster.json` following the example in `cluster.example.json` and customize to define the desired set of nodes. The global cluster name is combined with the `name` value and the index of the machine to generate the individual hostnames, where the index ranges from `start_index` to `start_index + count - 1`. The `start_index` spec allows you to avoid name collisions while having multiple machine configurations following the same sequential naming convention. + +For example, if the cluster name is `k8s`, then the `cluster.example.json` file would generate the following list of machines: + +```plain +k8s-controlplane-1 (gp.medium, 40GB disk) +k8s-controlplane-2 (gp.medium, 40GB disk) +k8s-controlplane-3 (gp.medium, 40GB disk) +k8s-worker-01 (gp.xlarge, 60GB disk) +k8s-worker-02 (gp.xlarge, 60GB disk) +k8s-worker-03 (m1.xlarge, 60GB disk) +``` + + + ## RKE2 (terraform/modules/rke2) -This module is not supported yet, wil create an RKE2 cluster +This module is not supported yet, will create an RKE2 cluster ## compute/openstack and rancher diff --git a/terraform/modules/rke1/cluster.example.json b/terraform/modules/rke1/cluster.example.json new file mode 100644 index 0000000000000000000000000000000000000000..8a198c31248dfb06181e6730c760b5b719f62f23 --- /dev/null +++ b/terraform/modules/rke1/cluster.example.json @@ -0,0 +1,25 @@ +{ + "machines": [ + { + "name": "controlplane", + "role": "controlplane", + "flavor": "gp.medium", + "os": "ubuntu", + "count": 3 + }, + { + "name": "worker", + "flavor": "gp.xlarge", + "os": "ubuntu", + "disk": 80, + "count": 2 + }, + { + "name": "worker", + "flavor": "m1.xlarge", + "os": "ubuntu", + "disk": 60, + "start_index": 3 + } + ] +} diff --git a/terraform/modules/rke1/nodes.tf b/terraform/modules/rke1/nodes.tf index a6698d605f6930d0622f3d0a9c0df8ab541aa0ff..b1c87a94b3e654e7b747342ab7d7d968b0590744 100644 --- a/terraform/modules/rke1/nodes.tf +++ b/terraform/modules/rke1/nodes.tf @@ -11,8 +11,8 @@ locals { machines = flatten([ for x in var.cluster_machines : [ - for i in range(x.count == null ? 1 : x.count) : { - hostname = format("%s-%s-%02d", var.cluster_name, x.name, (i + 1)) + for i in range(contains(keys(x), "count") ? x.count : 1) : { + hostname = format("%s-%s-%02d", var.cluster_name, x.name, (i + (contains(keys(x), "start_index") ? x.start_index : 1))) username = lookup(local.usernames, x.os, "UNDEFINED") image_name = lookup(var.openstack_os_image, x.os, "UNDEFINED") flavor = try(x.flavor, "gp.medium") @@ -21,7 +21,7 @@ locals { zone = try(x.zone, "nova") role = try(x.role, "worker") floating_ip = try(x.floating_ip, can(x.role == "controlplane")) - labels = flatten([x.name, try(x.labels, [])]) + labels = flatten([format("ncsa.role=%s", x.name), format("ncsa.flavor=%s", try(x.flavor, "gp.medium")), try(x.labels, [])]) } ] ]) @@ -66,14 +66,17 @@ resource "openstack_compute_instance_v2" "machine" { } user_data = base64encode(templatefile("${path.module}/templates/user_data.tmpl", { - private_key = openstack_compute_keypair_v2.key.private_key - project_name = data.openstack_identity_auth_scope_v3.scope.project_name - cluster_name = var.cluster_name - username = each.value.username - node_name = each.value.hostname - node_command = rancher2_cluster.kube.cluster_registration_token.0.node_command - node_options = lookup(local.node_options, each.value.role, "--worker") - node_labels = join(" ", [for l in each.value.labels : format("-l %s", replace(l, " ", "_"))]) + private_key = openstack_compute_keypair_v2.key.private_key + project_name = data.openstack_identity_auth_scope_v3.scope.project_name + cluster_name = var.cluster_name + username = each.value.username + node_name = each.value.hostname + node_command = rancher2_cluster.kube.cluster_registration_token.0.node_command + node_options = lookup(local.node_options, each.value.role, "--worker") + node_labels = join(" ", [for l in each.value.labels : format("-l %s", replace(l, " ", "_"))]) + ncsa_security = var.ncsa_security + taiga_enabled = var.taiga_enabled + install_docker = var.install_docker })) lifecycle { @@ -113,14 +116,17 @@ resource "openstack_compute_instance_v2" "controlplane" { #%{ endfor } user_data = base64encode(templatefile("${path.module}/templates/user_data.tmpl", { - private_key = openstack_compute_keypair_v2.key.private_key - project_name = data.openstack_identity_auth_scope_v3.scope.project_name - cluster_name = var.cluster_name - username = "centos" - node_name = local.controlplane[count.index] - node_command = rancher2_cluster.kube.cluster_registration_token.0.node_command - node_options = "--address awspublic --internal-address awslocal --controlplane --etcd" - node_labels = "" + private_key = openstack_compute_keypair_v2.key.private_key + project_name = data.openstack_identity_auth_scope_v3.scope.project_name + cluster_name = var.cluster_name + username = "centos" + node_name = local.controlplane[count.index] + node_command = rancher2_cluster.kube.cluster_registration_token.0.node_command + node_options = "--address awspublic --internal-address awslocal --controlplane --etcd" + node_labels = "" + ncsa_security = false + taiga_enabled = var.taiga_enabled + install_docker = var.install_docker })) block_device { @@ -167,14 +173,17 @@ resource "openstack_compute_instance_v2" "worker" { ] user_data = base64encode(templatefile("${path.module}/templates/user_data.tmpl", { - private_key = openstack_compute_keypair_v2.key.private_key - project_name = data.openstack_identity_auth_scope_v3.scope.project_name - cluster_name = var.cluster_name - node_name = local.worker[count.index] - username = "centos" - node_command = rancher2_cluster.kube.cluster_registration_token.0.node_command - node_options = "--worker" - node_labels = "" + private_key = openstack_compute_keypair_v2.key.private_key + project_name = data.openstack_identity_auth_scope_v3.scope.project_name + cluster_name = var.cluster_name + node_name = local.worker[count.index] + username = "centos" + node_command = rancher2_cluster.kube.cluster_registration_token.0.node_command + node_options = "--worker" + node_labels = "" + ncsa_security = false + taiga_enabled = var.taiga_enabled + install_docker = var.install_docker })) block_device { diff --git a/terraform/modules/rke1/providers.tf b/terraform/modules/rke1/providers.tf index 0fc99bb4bb92769f39b2b02d7c1a5c10b73899f2..6945b7c9d4fda8aafbed7e8c8cb29e6dee50df21 100644 --- a/terraform/modules/rke1/providers.tf +++ b/terraform/modules/rke1/providers.tf @@ -1,6 +1,6 @@ provider "openstack" { auth_url = var.openstack_url - region = "RegionOne" + region = var.openstack_region_name application_credential_id = var.openstack_credential_id application_credential_secret = var.openstack_credential_secret } diff --git a/terraform/modules/rke1/security_group.tf b/terraform/modules/rke1/security_group.tf index 90ba604813f674e0b5d601590575524871a20954..8931abb2880f80d3414c4f47d39de67c0744bb3f 100644 --- a/terraform/modules/rke1/security_group.tf +++ b/terraform/modules/rke1/security_group.tf @@ -88,6 +88,19 @@ resource "openstack_networking_secgroup_rule_v2" "ingress_kubeapi" { depends_on = [openstack_networking_secgroup_v2.cluster_security_group] } +resource "openstack_networking_secgroup_rule_v2" "custom" { + for_each = var.openstack_security_custom + description = "custom ${each.key}" + direction = "ingress" + ethertype = "IPv4" + protocol = "tcp" + port_range_min = each.value.port_range_min + port_range_max = each.value.port_range_max + remote_ip_prefix = each.value.remote_ip_prefix + security_group_id = openstack_networking_secgroup_v2.cluster_security_group.id + depends_on = [openstack_networking_secgroup_v2.cluster_security_group] +} + resource "openstack_networking_secgroup_rule_v2" "same_security_group_ingress_tcp" { direction = "ingress" ethertype = "IPv4" diff --git a/terraform/modules/rke1/templates/user_data.tmpl b/terraform/modules/rke1/templates/user_data.tmpl index 3ee73cf09ca10b2bee0709883d5c62bfe78a5b70..7012728a84079b812d3c145821fbb329546338f8 100644 --- a/terraform/modules/rke1/templates/user_data.tmpl +++ b/terraform/modules/rke1/templates/user_data.tmpl @@ -9,17 +9,46 @@ ssh: package_update: true package_upgrade: true +# install some packages +packages: + - chrony +%{ if username == "centos" } + - iscsi-initiator-utils + - nfs-utils +%{ endif } +%{ if username == "ubuntu" } + - open-iscsi + - nfs-common +%{ if ncsa_security } + - rsyslog-relp +%{ endif } +%{ endif } + +users: + - default +%{ if ncsa_security } + - name: qualys + gecos: Qualys Service + groups: users + system: true + shell: /bin/bash + ssh_authorized_keys: + - ecdsa-sha2-nistp521 AAAAE2VjZHNhLXNoYTItbmlzdHA1MjEAAAAIbmlzdHA1MjEAAACFBAGAwkmzfc0NyhjOdi1qfI5SVQ0prU1luu24xUNeEyEvH9CX80hmXt+ZnQt8Dc7HExUXDcSZo25g71WnuvlYbZefBgHkOLY5JpDcTGuQcb7W6CXD9UG7Unu4YbmBErQhs3u2iuNLYCDxAhoVvfK4Op/sNvMKME72KM3hQ6GE+H1QD8xZZA== +%{ endif } + # set timezone timezone: America/Chicago # files to be created on the system write_files: +%{ if taiga_enabled ~} - path: /etc/fstab permissions: "0644" owner: root:root content: | taiga-nfs.ncsa.illinois.edu:/taiga/ncsa/radiant/${project_name}/${cluster_name} /taiga nfs defaults 0 0 append: true +%{ endif ~} - path: /etc/docker/daemon.json permissions: "0644" owner: root:root @@ -32,6 +61,48 @@ write_files: }, "storage-driver": "overlay2" } +%{ if ncsa_security } +- path: /etc/rsyslog.d/00-ncsa.conf + permissions: "0644" + owner: root:root + content: | + # Load Output RELP module (at top) + $ModLoad omrelp + $WorkDirectory /var/spool/rsyslog # Directory to store buffer files (must exist!) + $ActionQueueType LinkedList # use asynchronous processing + $ActionQueueFileName syslog-security-buffer # set file name, also enables disk mode + $ActionQueueMaxDiskSpace 10g # space limit (use as much as possible) + $ActionResumeRetryCount -1 # infinite retries on insert failure + $ActionQueueSaveOnShutdown on # save in-memory data if rsyslog shuts down + *.* :omrelp:syslog.security.ncsa.illinois.edu:1514 +%{ endif } +%{ if ncsa_security } +- path: /etc/sysctl.d/50-disable-ipv6.conf + permissions: "0644" + owner: root:root + content: | + net.ipv6.conf.all.disable_ipv6 = 1 + net.ipv6.conf.default.disable_ipv6 = 1 +%{ endif } +%{ if username == "ubuntu" } +- path: /etc/sysctl.d/50-increase-inotify.conf + permissions: "0644" + owner: root:root + content: | + fs.inotify.max_user_instances=8192 +%{ endif } +%{ if ncsa_security } +%{ if username == "ubuntu" } +- path: /etc/chrony/sources.d/ncsa.sources + permissions: "0644" + owner: root:root + content: | + # University of Illinois NTP Servers + pool ntp.illinois.edu iburst maxsources 3 + # NCSA's NTP Server + pool ntp.ncsa.illinois.edu iburst maxsources 2 +%{ endif } +%{ endif } - path: /usr/local/bin/rke1 permissions: "0700" owner: root:root @@ -39,26 +110,38 @@ write_files: #!/usr/bin/bash echo "sleeping to wait for network" while ! ping -c 1 -w 0 1.1.1.1 > /dev/null ; do echo "Sleep 10s"; sleep 10; done - echo "install iscsi/nfs" - if [ -e /usr/bin/yum ]; then - yum -y install iscsi-initiator-utils nfs-utils - elif [ -e /usr/bin/apt ]; then - apt install -y open-iscsi nfs-common - else - echo "Don't know how to install iscsi/nfs" - fi +%{ if ncsa_security } + sysctl -w net.ipv6.conf.all.disable_ipv6=1 + sysctl -w net.ipv6.conf.default.disable_ipv6=1 + sysctl -w net.ipv6.route.flush=1 +%{ if username == "ubuntu" } + systemctl disable --now rpcbind + systemctl disable --now rpc-statd +%{ endif } +%{ endif } +%{ if username == "ubuntu" } + sysctl fs.inotify.max_user_instances=8192 +%{ endif } +%{ if install_docker ~} echo "install docker" curl https://releases.rancher.com/install-docker/24.0.sh | sh - systemctl enable docker - systemctl start docker + apt-get -qq update + apt-get -y dist-upgrade + systemctl enable --now docker usermod -aG docker ${username} +%{ endif ~} echo "connect to rancher" ${node_command} ${node_options} ${node_labels} +%{ if taiga_enabled ~} echo "mounting taiga" mkdir /taiga - #mount -av +%{ endif ~} echo "all done" # run this command once the system is booted runcmd: - /usr/local/bin/rke1 + +power_state: + delay: "+5" + mode: reboot diff --git a/terraform/modules/rke1/variables.tf b/terraform/modules/rke1/variables.tf index 589c5375e7fcd99ee00f0c1c23173a542511cc34..2aac442f744a8d19f23b6905b8538a8a049af991 100644 --- a/terraform/modules/rke1/variables.tf +++ b/terraform/modules/rke1/variables.tf @@ -110,6 +110,12 @@ variable "openstack_url" { default = "https://radiant.ncsa.illinois.edu" } +variable "openstack_region_name" { + type = string + description = "OpenStack region name" + default = "RegionOne" +} + variable "openstack_credential_id" { type = string sensitive = true @@ -161,6 +167,13 @@ variable "openstack_security_ssh" { } } +variable "openstack_security_custom" { + type = map(any) + description = "ports to open for custom services to the world, assumed these are blocked in other ways" + default = { + } +} + variable "openstack_os_image" { type = map(any) description = "Map from short OS name to image" @@ -251,3 +264,25 @@ variable "floating_ip" { description = "Number of floating IP addresses available for loadbalancers" default = 2 } + +# ---------------------------------------------------------------------- +# NODE CREATION OPTIONS +# ---------------------------------------------------------------------- + +variable "ncsa_security" { + type = bool + description = "Install NCSA security options, for example rsyslog" + default = false +} + +variable "taiga_enabled" { + type = bool + description = "Enable Taiga mount" + default = true +} + +variable "install_docker" { + type = bool + description = "Install Docker when provisioning node" + default = true +}