diff --git a/ansible/.env.example b/ansible/.env.example index d2dbb9b..32c82ed 100644 --- a/ansible/.env.example +++ b/ansible/.env.example @@ -4,11 +4,8 @@ # Tailscale Auth Key (必须) export TAILSCALE_AUTH_KEY="" -# K3s Token (添加节点时需要,从首节点安装输出获取) -export K3S_TOKEN="" - -# K3s Server URL (添加节点时需要) -export K3S_SERVER_URL="https://k3s.dev.cm:6443" +# K3s HA Server URL (添加节点时需要) +export HA_SERVER_URL="https://k3s.example.com:6443" # SSH 密码 (如果使用密码认证,必须设置;如果使用密钥认证,留空) export SSH_PASSWORD="" diff --git a/ansible/README.md b/ansible/README.md index 6730a9f..76765dc 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -7,12 +7,15 @@ ``` ansible/ ├── ansible.cfg # Ansible 配置 -├── inventory/hosts.yml # 主机清单 ⭐ 需修改 -├── group_vars/all.yml # 全局变量 -├── .env.example # 环境变量模板 +├── .ansible-lint # Lint 规则配置 +├── requirements.yml # Ansible Galaxy 依赖 +├── inventory/ +│ ├── hosts.yml # 主机清单 ⭐ 需修改 +│ └── group_vars/all.yml # 全局变量 ├── roles/ │ ├── ssh/ # SSH 安全加固 │ │ ├── tasks/main.yml +│ │ ├── handlers/main.yml │ │ └── templates/sshd_config.j2 │ ├── common/ # 基础配置 (hostname, sysctl, tailscale) │ │ ├── tasks/main.yml @@ -24,9 +27,7 @@ ansible/ │ ├── k3s-agent.yaml.j2 # Agent 配置 │ └── registries.yaml.j2 # 镜像加速 └── playbooks/ - ├── site.yml # 完整安装 - ├── init.yml # 首次安装 (含 SSH 加固) - └── add-node.yml # 添加节点 + └── site.yml # 完整安装 ``` ## 快速开始 @@ -57,8 +58,7 @@ export TAILSCALE_AUTH_KEY="tskey-auth-xxx" # 首次安装 (SSH 加固) export SSH_PASSWORD="your-root-password" -# 添加节点时 (从首节点安装输出获取) -export K3S_TOKEN="K10xxx::server:xxx" +# 单独添加节点时 (完整安装时自动获取) export K3S_SERVER_URL="https://10.0.0.1:6443" ``` @@ -74,10 +74,10 @@ ansible-playbook playbooks/site.yml --tags ssh,common,k3s,status ansible-playbook playbooks/site.yml # 方式三: 仅安装首个 master -ansible-playbook playbooks/site.yml -l master1 +ansible-playbook playbooks/site.yml -l first-master-name # 方式四: 添加新节点 -ansible-playbook playbooks/add-node.yml -l agent1 +ansible-playbook playbooks/site.yml -l new-node-name ``` ### 4. 获取 kubeconfig @@ -108,8 +108,8 @@ kubectl get nodes | 变量 | 必须 | 说明 | |------|------|------| | `TAILSCALE_AUTH_KEY` | ✅ | Tailscale Auth Key | -| `K3S_TOKEN` | 加入节点时 | 集群 Token | -| `K3S_SERVER_URL` | 加入节点时 | API Server 地址 | +| `K3S_TOKEN` | 单独添加节点时 | 集群 Token (完整安装时自动获取) | +| `K3S_SERVER_URL` | 单独添加节点时 | API Server 地址 (完整安装时自动设置) | | `SSH_PASSWORD` | 首次安装 | SSH 密码 | | `SSH_PUBKEY` | - | SSH 公钥 (默认 ~/.ssh/id_rsa.pub) | @@ -129,6 +129,18 @@ kubectl get nodes 3. 启用密钥认证 4. 自动添加本地公钥 +## 集群安装流程 + +Playbook 按以下顺序执行: + +1. **初始化节点安装**: 安装 `cluster_init: true` 的第一个 master 节点 +2. **动态获取 Token**: 从初始化节点读取 `/var/lib/rancher/k3s/server/node-token` +3. **Token 注入**: 将 K3S_TOKEN 和 K3S_SERVER_URL 设置为所有节点的 fact +4. **其他 Master 节点**: 使用动态获取的 Token 加入集群 +5. **Agent 节点**: 使用动态获取的 Token 加入集群 + +这样在一次性安装整个集群时,无需手动设置 `K3S_TOKEN` 环境变量。 + ## 常用命令 ```bash diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index 219d06f..003567d 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -3,8 +3,13 @@ inventory = inventory/hosts.yml roles_path = roles host_key_checking = False retry_files_enabled = False -stdout_callback = yaml +stdout_callback = default +callbacks_enabled = ansible.builtin.default interpreter_python = auto_silent +deprecation_warnings = False + +[callback_default] +result_format = yaml [privilege_escalation] become = True @@ -14,5 +19,3 @@ become_user = root [ssh_connection] pipelining = True ssh_args = -o ControlMaster=auto -o ControlPersist=60s - - diff --git a/ansible/group_vars/all.yml b/ansible/inventory/group_vars/all.yml similarity index 81% rename from ansible/group_vars/all.yml rename to ansible/inventory/group_vars/all.yml index 561f838..45ea3a2 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/inventory/group_vars/all.yml @@ -4,14 +4,14 @@ # 敏感信息 (通过环境变量传入) # ============================================ tailscale_auth_key: "{{ lookup('env', 'TAILSCALE_AUTH_KEY') }}" -k3s_token: "{{ lookup('env', 'K3S_TOKEN') }}" -k3s_server_url: "{{ lookup('env', 'K3S_SERVER_URL') | default('https://k3s.dev.cm:6443', true) }}" +# 高可用集群的 server_url 需要指向负载均衡器地址,单节点集群则指向自身 +ha_server_url: "{{ lookup('env', 'HA_SERVER_URL') }}" # ============================================ # K3s 配置 # ============================================ -k3s_version: "v1.31.4+k3s1" -k3s_tls_san: "k3s.dev.cm,k3s.fillcode.com" +# k3s_token 和 k3s_server_url 由 site.yml 动态设置,此处仅定义版本 +k3s_version: "v1.34.2+k3s1" # ETCD 配置 etcd_snapshot_retention: 1 diff --git a/ansible/playbooks/add-node.yml b/ansible/playbooks/add-node.yml deleted file mode 100644 index f054696..0000000 --- a/ansible/playbooks/add-node.yml +++ /dev/null @@ -1,19 +0,0 @@ -# 添加新节点到集群 -# 使用: export K3S_TOKEN='xxx' K3S_SERVER_URL='xxx' TAILSCALE_AUTH_KEY='xxx' -# ansible-playbook playbooks/add-node.yml -l ---- -- name: Validate - hosts: localhost - gather_facts: no - tasks: - - ansible.builtin.fail: - msg: "请设置: export K3S_TOKEN='xxx'" - when: lookup('env', 'K3S_TOKEN') | length == 0 - -- name: Add node - hosts: masters:agents:!master_init - gather_facts: yes - roles: - - common - - k3s - diff --git a/ansible/playbooks/init.yml b/ansible/playbooks/init.yml deleted file mode 100644 index 7cc9aa2..0000000 --- a/ansible/playbooks/init.yml +++ /dev/null @@ -1,8 +0,0 @@ -# 首次安装 (包含 SSH 加固) -# 使用: export SSH_PASSWORD='xxx' TAILSCALE_AUTH_KEY='xxx' -# ansible-playbook playbooks/init.yml ---- -- name: First time installation with SSH hardening - import_playbook: site.yml - tags: [ssh, common, k3s, status] - diff --git a/ansible/playbooks/site.yml b/ansible/playbooks/site.yml index 7d36284..eac26a9 100644 --- a/ansible/playbooks/site.yml +++ b/ansible/playbooks/site.yml @@ -2,7 +2,7 @@ --- - name: Validate environment hosts: localhost - gather_facts: no + gather_facts: false tasks: - name: Check TAILSCALE_AUTH_KEY ansible.builtin.fail: @@ -23,7 +23,7 @@ # ============================================ - name: SSH Security Hardening hosts: k3s_cluster - gather_facts: no + gather_facts: false tags: [ssh, never] roles: - ssh @@ -33,7 +33,7 @@ # ============================================ - name: Common Setup hosts: k3s_cluster - gather_facts: yes + gather_facts: true tags: [common] roles: - common @@ -43,16 +43,46 @@ # ============================================ - name: Install K3s on init node hosts: masters - gather_facts: yes + gather_facts: true serial: 1 tags: [k3s] roles: - role: k3s when: cluster_init | default(false) +- name: Fetch K3S_TOKEN & K3S_SERVER_URL from init node + hosts: localhost + gather_facts: false + tags: [k3s] + tasks: + - name: Find init node + ansible.builtin.set_fact: + init_node: "{{ item }}" + loop: "{{ groups['masters'] }}" + when: hostvars[item].cluster_init | default(false) + + - name: Read K3S_TOKEN from init node + ansible.builtin.slurp: + src: /var/lib/rancher/k3s/server/node-token + register: k3s_token_content + delegate_to: "{{ init_node }}" + + - name: Determine K3S_SERVER_URL + ansible.builtin.set_fact: + # 优先使用 HA_SERVER_URL 环境变量,否则使用 init 节点地址 + k3s_server_url_or_ha: "{{ ha_server_url if (ha_server_url | length > 0) else 'https://' + hostvars[init_node].ansible_host + ':6443' }}" + + - name: Set K3S_TOKEN and K3S_SERVER_URL for all hosts + ansible.builtin.set_fact: + k3s_token: "{{ k3s_token_content.content | b64decode | trim }}" + k3s_server_url: "{{ k3s_server_url_or_ha }}" + delegate_to: "{{ item }}" + delegate_facts: true + loop: "{{ groups['k3s_cluster'] }}" + - name: Install K3s on other masters hosts: masters - gather_facts: yes + gather_facts: true serial: 1 tags: [k3s] roles: @@ -61,7 +91,7 @@ - name: Install K3s on agents hosts: agents - gather_facts: yes + gather_facts: true tags: [k3s] roles: - k3s @@ -71,15 +101,16 @@ # ============================================ - name: Show cluster status hosts: masters - gather_facts: no + gather_facts: false tags: [status] run_once: true tasks: - name: Get nodes - ansible.builtin.shell: kubectl get nodes -o wide + ansible.builtin.command: kubectl get nodes -o wide environment: KUBECONFIG: /etc/rancher/k3s/k3s.yaml register: nodes + changed_when: false when: cluster_init | default(false) - name: Display nodes @@ -90,4 +121,3 @@ {{ nodes.stdout }} ══════════════════════════════════════════════════════════════ when: cluster_init | default(false) - diff --git a/ansible/roles/common/handlers/main.yml b/ansible/roles/common/handlers/main.yml index cabea91..9e42f39 100644 --- a/ansible/roles/common/handlers/main.yml +++ b/ansible/roles/common/handlers/main.yml @@ -1,4 +1,4 @@ --- - name: Apply sysctl ansible.builtin.command: sysctl --system - + changed_when: true diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index 55ad7e6..66379db 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -19,28 +19,43 @@ content: | net.ipv4.ip_forward = 1 net.ipv6.conf.all.forwarding = 1 - mode: '0644' + mode: "0644" notify: Apply sysctl - name: Install dependencies ansible.builtin.apt: - name: [curl, wget, ca-certificates] + name: + - curl + - wget + - ca-certificates state: present - update_cache: yes + update_cache: true - name: Check if Tailscale is installed ansible.builtin.command: which tailscale - register: tailscale_check - ignore_errors: yes + register: common_tailscale_check + failed_when: false changed_when: false +- name: Download Tailscale install script + ansible.builtin.get_url: + url: https://tailscale.com/install.sh + dest: /tmp/tailscale-install.sh + mode: "0755" + when: common_tailscale_check.rc != 0 + - name: Install Tailscale - ansible.builtin.shell: curl -fsSL https://tailscale.com/install.sh | sh - when: tailscale_check.rc != 0 + ansible.builtin.command: /tmp/tailscale-install.sh + when: common_tailscale_check.rc != 0 + changed_when: true + +- name: Remove Tailscale install script + ansible.builtin.file: + path: /tmp/tailscale-install.sh + state: absent - name: Enable Tailscale service ansible.builtin.systemd: name: tailscaled - enabled: yes + enabled: true state: started - diff --git a/ansible/roles/k3s/tasks/main.yml b/ansible/roles/k3s/tasks/main.yml index e972687..b263502 100644 --- a/ansible/roles/k3s/tasks/main.yml +++ b/ansible/roles/k3s/tasks/main.yml @@ -3,69 +3,84 @@ - name: Validate TAILSCALE_AUTH_KEY ansible.builtin.fail: msg: "请设置环境变量: export TAILSCALE_AUTH_KEY='tskey-auth-xxx'" - when: tailscale_auth_key | length == 0 - -- name: Validate K3S_TOKEN for join nodes - ansible.builtin.fail: - msg: "请设置环境变量: export K3S_TOKEN='xxx'" - when: - - not (cluster_init | default(false)) - - k3s_token | length == 0 + when: (tailscale_auth_key | default('')) | length == 0 - name: Create K3s config directory ansible.builtin.file: path: /etc/rancher/k3s state: directory - mode: '0755' + mode: "0755" -# Server 节点配置 +# 部署配置文件 - name: Deploy K3s server config ansible.builtin.template: src: k3s-server.yaml.j2 dest: /etc/rancher/k3s/config.yaml - mode: '0600' + mode: "0600" when: "'masters' in group_names" -# Agent 节点配置 - name: Deploy K3s agent config ansible.builtin.template: src: k3s-agent.yaml.j2 dest: /etc/rancher/k3s/config.yaml - mode: '0600' + mode: "0600" when: "'agents' in group_names" -# 镜像加速配置 - name: Deploy registries.yaml ansible.builtin.template: src: registries.yaml.j2 dest: /etc/rancher/k3s/registries.yaml - mode: '0644' + mode: "0644" when: use_mirror | default(false) -# 安装 K3s -- name: Set install URL +# 设置安装变量 +- name: Set K3s install variables ansible.builtin.set_fact: k3s_install_url: "{{ mirror_k3s_install_url if (use_mirror | default(false)) else global_k3s_install_url }}" k3s_install_mirror: "{{ 'INSTALL_K3S_MIRROR=cn' if (use_mirror | default(false)) else '' }}" +# 检查安装状态 - name: Check if K3s is installed ansible.builtin.stat: path: /usr/local/bin/k3s register: k3s_binary +# 下载安装脚本 +- name: Download K3s install script + ansible.builtin.get_url: + url: "{{ k3s_install_url }}" + dest: /tmp/k3s-install.sh + mode: "0755" + when: not k3s_binary.stat.exists + +# 安装 K3s - name: Install K3s server - ansible.builtin.shell: | - curl -sfL {{ k3s_install_url }} | {{ k3s_install_mirror }} INSTALL_K3S_VERSION={{ k3s_version }} sh -s - server + ansible.builtin.command: + cmd: /tmp/k3s-install.sh server + environment: + INSTALL_K3S_VERSION: "{{ k3s_version }}" + INSTALL_K3S_MIRROR: "{{ 'cn' if (use_mirror | default(false)) else '' }}" when: - "'masters' in group_names" - not k3s_binary.stat.exists + changed_when: true - name: Install K3s agent - ansible.builtin.shell: | - curl -sfL {{ k3s_install_url }} | {{ k3s_install_mirror }} INSTALL_K3S_VERSION={{ k3s_version }} sh -s - agent + ansible.builtin.command: + cmd: /tmp/k3s-install.sh agent + environment: + INSTALL_K3S_VERSION: "{{ k3s_version }}" + INSTALL_K3S_MIRROR: "{{ 'cn' if (use_mirror | default(false)) else '' }}" when: - "'agents' in group_names" - not k3s_binary.stat.exists + changed_when: true + +# 清理安装脚本 +- name: Remove install script + ansible.builtin.file: + path: /tmp/k3s-install.sh + state: absent # 等待 K3s 就绪 (仅 Server) - name: Wait for K3s server ready @@ -74,30 +89,19 @@ timeout: 120 when: "'masters' in group_names" -# 输出 Token (仅 cluster-init) -- name: Get node token - ansible.builtin.slurp: - src: /var/lib/rancher/k3s/server/node-token - register: node_token - when: cluster_init | default(false) - -- name: Display node token - ansible.builtin.debug: - msg: | - ══════════════════════════════════════════════════════════════ - K3S_TOKEN (用于添加新节点): - {{ node_token.content | b64decode | trim }} - - K3S_SERVER_URL: - https://{{ ansible_host }}:6443 - ══════════════════════════════════════════════════════════════ - when: cluster_init | default(false) - # 保存 kubeconfig (仅 cluster-init) - name: Fetch kubeconfig ansible.builtin.fetch: src: /etc/rancher/k3s/k3s.yaml dest: "{{ playbook_dir }}/../kubeconfig.yaml" - flat: yes + flat: true when: cluster_init | default(false) +- name: Update kubeconfig server address + ansible.builtin.replace: + path: "{{ playbook_dir }}/../kubeconfig.yaml" + regexp: 'server: https://127\.0\.0\.1:6443' + replace: "server: {{ ha_server_url if (ha_server_url | default('') | length > 0) else 'https://' + ansible_host + ':6443' }}" + delegate_to: localhost + become: false + when: cluster_init | default(false) diff --git a/ansible/roles/k3s/templates/k3s-server.yaml.j2 b/ansible/roles/k3s/templates/k3s-server.yaml.j2 index 8936dc6..f9ccf49 100644 --- a/ansible/roles/k3s/templates/k3s-server.yaml.j2 +++ b/ansible/roles/k3s/templates/k3s-server.yaml.j2 @@ -9,8 +9,14 @@ server: "{{ k3s_server_url }}" token: "{{ k3s_token }}" {% endif %} +# TLS SAN: 包含 HA 地址 + 所有 master 节点地址 tls-san: - - "{{ k3s_tls_san }}" +{% if ha_server_url | default('') | length > 0 %} + - "{{ ha_server_url | regex_replace('^https?://([^:]+)(:[0-9]+)?$', '\\1') }}" +{% endif %} +{% for host in groups['masters'] %} + - "{{ hostvars[host].ansible_host }}" +{% endfor %} # ETCD 快照配置 etcd-snapshot-retention: {{ etcd_snapshot_retention }} diff --git a/ansible/roles/ssh/handlers/main.yml b/ansible/roles/ssh/handlers/main.yml new file mode 100644 index 0000000..9c36886 --- /dev/null +++ b/ansible/roles/ssh/handlers/main.yml @@ -0,0 +1,21 @@ +--- +- name: Restart sshd + ansible.builtin.systemd: + name: sshd + state: restarted + listen: Restart sshd + +- name: Update ansible port + ansible.builtin.set_fact: + ansible_port: "{{ ssh_new_port }}" + listen: Update ansible port + +- name: Wait for new SSH port + ansible.builtin.wait_for: + port: "{{ ssh_new_port }}" + host: "{{ ansible_host }}" + delay: 5 + timeout: 60 + delegate_to: localhost + become: false + listen: Wait for new SSH port diff --git a/ansible/roles/ssh/tasks/main.yml b/ansible/roles/ssh/tasks/main.yml index b10c947..6737a28 100644 --- a/ansible/roles/ssh/tasks/main.yml +++ b/ansible/roles/ssh/tasks/main.yml @@ -5,7 +5,7 @@ ansible.builtin.file: path: /root/.ssh state: directory - mode: '0700' + mode: "0700" - name: Add SSH public key ansible.builtin.authorized_key: @@ -17,35 +17,17 @@ ansible.builtin.copy: src: /etc/ssh/sshd_config dest: /etc/ssh/sshd_config.bak - remote_src: yes - force: no + remote_src: true + force: false + mode: "0600" - name: Deploy secure sshd_config ansible.builtin.template: src: sshd_config.j2 dest: /etc/ssh/sshd_config - mode: '0600' - validate: '/usr/sbin/sshd -t -f %s' - register: sshd_config - -- name: Restart sshd service - ansible.builtin.systemd: - name: sshd - state: restarted - when: sshd_config.changed - -- name: Update ansible_port to new SSH port - ansible.builtin.set_fact: - ansible_port: "{{ ssh_new_port }}" - when: sshd_config.changed - -- name: Wait for SSH on new port - ansible.builtin.wait_for: - port: "{{ ssh_new_port }}" - host: "{{ ansible_host }}" - delay: 5 - timeout: 60 - delegate_to: localhost - become: no - when: sshd_config.changed - + mode: "0600" + validate: "/usr/sbin/sshd -t -f %s" + notify: + - Restart sshd + - Update ansible port + - Wait for new SSH port