feat(ansible): ansible 初步稳定

这个提交包含在:
rohow
2026-02-10 10:25:28 +08:00
未验证
父节点 d6bcd22ecd
当前提交 acd50f7093
修改 13 个文件,包含 183 行新增140 行删除
+2 -5
查看文件
@@ -4,11 +4,8 @@
# Tailscale Auth Key (必须)
export TAILSCALE_AUTH_KEY=""
# K3s Token (添加节点时需要,从首节点安装输出获取)
export K3S_TOKEN=""
# K3s Server URL (添加节点时需要)
export K3S_SERVER_URL="https://k3s.dev.cm:6443"
# K3s HA Server URL (添加节点时需要)
export HA_SERVER_URL="https://k3s.example.com:6443"
# SSH 密码 (如果使用密码认证,必须设置;如果使用密钥认证,留空)
export SSH_PASSWORD=""
+24 -12
查看文件
@@ -7,12 +7,15 @@
```
ansible/
├── ansible.cfg # Ansible 配置
├── inventory/hosts.yml # 主机清单 ⭐ 需修改
├── group_vars/all.yml # 全局变量
├── .env.example # 环境变量模板
├── .ansible-lint # Lint 规则配置
├── requirements.yml # Ansible Galaxy 依赖
├── inventory/
│ ├── hosts.yml # 主机清单 ⭐ 需修改
│ └── group_vars/all.yml # 全局变量
├── roles/
│ ├── ssh/ # SSH 安全加固
│ │ ├── tasks/main.yml
│ │ ├── handlers/main.yml
│ │ └── templates/sshd_config.j2
│ ├── common/ # 基础配置 (hostname, sysctl, tailscale)
│ │ ├── tasks/main.yml
@@ -24,9 +27,7 @@ ansible/
│ ├── k3s-agent.yaml.j2 # Agent 配置
│ └── registries.yaml.j2 # 镜像加速
└── playbooks/
── site.yml # 完整安装
├── init.yml # 首次安装 (含 SSH 加固)
└── add-node.yml # 添加节点
── site.yml # 完整安装
```
## 快速开始
@@ -57,8 +58,7 @@ export TAILSCALE_AUTH_KEY="tskey-auth-xxx"
# 首次安装 (SSH 加固)
export SSH_PASSWORD="your-root-password"
# 添加节点时 (从首节点安装输出获取)
export K3S_TOKEN="K10xxx::server:xxx"
# 单独添加节点时 (完整安装时自动获取)
export K3S_SERVER_URL="https://10.0.0.1:6443"
```
@@ -74,10 +74,10 @@ ansible-playbook playbooks/site.yml --tags ssh,common,k3s,status
ansible-playbook playbooks/site.yml
# 方式三: 仅安装首个 master
ansible-playbook playbooks/site.yml -l master1
ansible-playbook playbooks/site.yml -l first-master-name
# 方式四: 添加新节点
ansible-playbook playbooks/add-node.yml -l agent1
ansible-playbook playbooks/site.yml -l new-node-name
```
### 4. 获取 kubeconfig
@@ -108,8 +108,8 @@ kubectl get nodes
| 变量 | 必须 | 说明 |
|------|------|------|
| `TAILSCALE_AUTH_KEY` | ✅ | Tailscale Auth Key |
| `K3S_TOKEN` | 加节点时 | 集群 Token |
| `K3S_SERVER_URL` | 加节点时 | API Server 地址 |
| `K3S_TOKEN` | 单独添加节点时 | 集群 Token (完整安装时自动获取) |
| `K3S_SERVER_URL` | 单独添加节点时 | API Server 地址 (完整安装时自动设置) |
| `SSH_PASSWORD` | 首次安装 | SSH 密码 |
| `SSH_PUBKEY` | - | SSH 公钥 (默认 ~/.ssh/id_rsa.pub) |
@@ -129,6 +129,18 @@ kubectl get nodes
3. 启用密钥认证
4. 自动添加本地公钥
## 集群安装流程
Playbook 按以下顺序执行:
1. **初始化节点安装**: 安装 `cluster_init: true` 的第一个 master 节点
2. **动态获取 Token**: 从初始化节点读取 `/var/lib/rancher/k3s/server/node-token`
3. **Token 注入**: 将 K3S_TOKEN 和 K3S_SERVER_URL 设置为所有节点的 fact
4. **其他 Master 节点**: 使用动态获取的 Token 加入集群
5. **Agent 节点**: 使用动态获取的 Token 加入集群
这样在一次性安装整个集群时,无需手动设置 `K3S_TOKEN` 环境变量。
## 常用命令
```bash
+6 -3
查看文件
@@ -3,8 +3,13 @@ inventory = inventory/hosts.yml
roles_path = roles
host_key_checking = False
retry_files_enabled = False
stdout_callback = yaml
stdout_callback = default
callbacks_enabled = ansible.builtin.default
interpreter_python = auto_silent
deprecation_warnings = False
[callback_default]
result_format = yaml
[privilege_escalation]
become = True
@@ -14,5 +19,3 @@ become_user = root
[ssh_connection]
pipelining = True
ssh_args = -o ControlMaster=auto -o ControlPersist=60s
@@ -4,14 +4,14 @@
# 敏感信息 (通过环境变量传入)
# ============================================
tailscale_auth_key: "{{ lookup('env', 'TAILSCALE_AUTH_KEY') }}"
k3s_token: "{{ lookup('env', 'K3S_TOKEN') }}"
k3s_server_url: "{{ lookup('env', 'K3S_SERVER_URL') | default('https://k3s.dev.cm:6443', true) }}"
# 高可用集群的 server_url 需要指向负载均衡器地址,单节点集群则指向自身
ha_server_url: "{{ lookup('env', 'HA_SERVER_URL') }}"
# ============================================
# K3s 配置
# ============================================
k3s_version: "v1.31.4+k3s1"
k3s_tls_san: "k3s.dev.cm,k3s.fillcode.com"
# k3s_token 和 k3s_server_url 由 site.yml 动态设置,此处仅定义版本
k3s_version: "v1.34.2+k3s1"
# ETCD 配置
etcd_snapshot_retention: 1
-19
查看文件
@@ -1,19 +0,0 @@
# 添加新节点到集群
# 使用: export K3S_TOKEN='xxx' K3S_SERVER_URL='xxx' TAILSCALE_AUTH_KEY='xxx'
# ansible-playbook playbooks/add-node.yml -l <node_name>
---
- name: Validate
hosts: localhost
gather_facts: no
tasks:
- ansible.builtin.fail:
msg: "请设置: export K3S_TOKEN='xxx'"
when: lookup('env', 'K3S_TOKEN') | length == 0
- name: Add node
hosts: masters:agents:!master_init
gather_facts: yes
roles:
- common
- k3s
-8
查看文件
@@ -1,8 +0,0 @@
# 首次安装 (包含 SSH 加固)
# 使用: export SSH_PASSWORD='xxx' TAILSCALE_AUTH_KEY='xxx'
# ansible-playbook playbooks/init.yml
---
- name: First time installation with SSH hardening
import_playbook: site.yml
tags: [ssh, common, k3s, status]
+39 -9
查看文件
@@ -2,7 +2,7 @@
---
- name: Validate environment
hosts: localhost
gather_facts: no
gather_facts: false
tasks:
- name: Check TAILSCALE_AUTH_KEY
ansible.builtin.fail:
@@ -23,7 +23,7 @@
# ============================================
- name: SSH Security Hardening
hosts: k3s_cluster
gather_facts: no
gather_facts: false
tags: [ssh, never]
roles:
- ssh
@@ -33,7 +33,7 @@
# ============================================
- name: Common Setup
hosts: k3s_cluster
gather_facts: yes
gather_facts: true
tags: [common]
roles:
- common
@@ -43,16 +43,46 @@
# ============================================
- name: Install K3s on init node
hosts: masters
gather_facts: yes
gather_facts: true
serial: 1
tags: [k3s]
roles:
- role: k3s
when: cluster_init | default(false)
- name: Fetch K3S_TOKEN & K3S_SERVER_URL from init node
hosts: localhost
gather_facts: false
tags: [k3s]
tasks:
- name: Find init node
ansible.builtin.set_fact:
init_node: "{{ item }}"
loop: "{{ groups['masters'] }}"
when: hostvars[item].cluster_init | default(false)
- name: Read K3S_TOKEN from init node
ansible.builtin.slurp:
src: /var/lib/rancher/k3s/server/node-token
register: k3s_token_content
delegate_to: "{{ init_node }}"
- name: Determine K3S_SERVER_URL
ansible.builtin.set_fact:
# 优先使用 HA_SERVER_URL 环境变量,否则使用 init 节点地址
k3s_server_url_or_ha: "{{ ha_server_url if (ha_server_url | length > 0) else 'https://' + hostvars[init_node].ansible_host + ':6443' }}"
- name: Set K3S_TOKEN and K3S_SERVER_URL for all hosts
ansible.builtin.set_fact:
k3s_token: "{{ k3s_token_content.content | b64decode | trim }}"
k3s_server_url: "{{ k3s_server_url_or_ha }}"
delegate_to: "{{ item }}"
delegate_facts: true
loop: "{{ groups['k3s_cluster'] }}"
- name: Install K3s on other masters
hosts: masters
gather_facts: yes
gather_facts: true
serial: 1
tags: [k3s]
roles:
@@ -61,7 +91,7 @@
- name: Install K3s on agents
hosts: agents
gather_facts: yes
gather_facts: true
tags: [k3s]
roles:
- k3s
@@ -71,15 +101,16 @@
# ============================================
- name: Show cluster status
hosts: masters
gather_facts: no
gather_facts: false
tags: [status]
run_once: true
tasks:
- name: Get nodes
ansible.builtin.shell: kubectl get nodes -o wide
ansible.builtin.command: kubectl get nodes -o wide
environment:
KUBECONFIG: /etc/rancher/k3s/k3s.yaml
register: nodes
changed_when: false
when: cluster_init | default(false)
- name: Display nodes
@@ -90,4 +121,3 @@
{{ nodes.stdout }}
══════════════════════════════════════════════════════════════
when: cluster_init | default(false)
+1 -1
查看文件
@@ -1,4 +1,4 @@
---
- name: Apply sysctl
ansible.builtin.command: sysctl --system
changed_when: true
+24 -9
查看文件
@@ -19,28 +19,43 @@
content: |
net.ipv4.ip_forward = 1
net.ipv6.conf.all.forwarding = 1
mode: '0644'
mode: "0644"
notify: Apply sysctl
- name: Install dependencies
ansible.builtin.apt:
name: [curl, wget, ca-certificates]
name:
- curl
- wget
- ca-certificates
state: present
update_cache: yes
update_cache: true
- name: Check if Tailscale is installed
ansible.builtin.command: which tailscale
register: tailscale_check
ignore_errors: yes
register: common_tailscale_check
failed_when: false
changed_when: false
- name: Download Tailscale install script
ansible.builtin.get_url:
url: https://tailscale.com/install.sh
dest: /tmp/tailscale-install.sh
mode: "0755"
when: common_tailscale_check.rc != 0
- name: Install Tailscale
ansible.builtin.shell: curl -fsSL https://tailscale.com/install.sh | sh
when: tailscale_check.rc != 0
ansible.builtin.command: /tmp/tailscale-install.sh
when: common_tailscale_check.rc != 0
changed_when: true
- name: Remove Tailscale install script
ansible.builtin.file:
path: /tmp/tailscale-install.sh
state: absent
- name: Enable Tailscale service
ansible.builtin.systemd:
name: tailscaled
enabled: yes
enabled: true
state: started
+45 -41
查看文件
@@ -3,69 +3,84 @@
- name: Validate TAILSCALE_AUTH_KEY
ansible.builtin.fail:
msg: "请设置环境变量: export TAILSCALE_AUTH_KEY='tskey-auth-xxx'"
when: tailscale_auth_key | length == 0
- name: Validate K3S_TOKEN for join nodes
ansible.builtin.fail:
msg: "请设置环境变量: export K3S_TOKEN='xxx'"
when:
- not (cluster_init | default(false))
- k3s_token | length == 0
when: (tailscale_auth_key | default('')) | length == 0
- name: Create K3s config directory
ansible.builtin.file:
path: /etc/rancher/k3s
state: directory
mode: '0755'
mode: "0755"
# Server 节点配置
# 部署配置文件
- name: Deploy K3s server config
ansible.builtin.template:
src: k3s-server.yaml.j2
dest: /etc/rancher/k3s/config.yaml
mode: '0600'
mode: "0600"
when: "'masters' in group_names"
# Agent 节点配置
- name: Deploy K3s agent config
ansible.builtin.template:
src: k3s-agent.yaml.j2
dest: /etc/rancher/k3s/config.yaml
mode: '0600'
mode: "0600"
when: "'agents' in group_names"
# 镜像加速配置
- name: Deploy registries.yaml
ansible.builtin.template:
src: registries.yaml.j2
dest: /etc/rancher/k3s/registries.yaml
mode: '0644'
mode: "0644"
when: use_mirror | default(false)
# 安装 K3s
- name: Set install URL
# 设置安装变量
- name: Set K3s install variables
ansible.builtin.set_fact:
k3s_install_url: "{{ mirror_k3s_install_url if (use_mirror | default(false)) else global_k3s_install_url }}"
k3s_install_mirror: "{{ 'INSTALL_K3S_MIRROR=cn' if (use_mirror | default(false)) else '' }}"
# 检查安装状态
- name: Check if K3s is installed
ansible.builtin.stat:
path: /usr/local/bin/k3s
register: k3s_binary
# 下载安装脚本
- name: Download K3s install script
ansible.builtin.get_url:
url: "{{ k3s_install_url }}"
dest: /tmp/k3s-install.sh
mode: "0755"
when: not k3s_binary.stat.exists
# 安装 K3s
- name: Install K3s server
ansible.builtin.shell: |
curl -sfL {{ k3s_install_url }} | {{ k3s_install_mirror }} INSTALL_K3S_VERSION={{ k3s_version }} sh -s - server
ansible.builtin.command:
cmd: /tmp/k3s-install.sh server
environment:
INSTALL_K3S_VERSION: "{{ k3s_version }}"
INSTALL_K3S_MIRROR: "{{ 'cn' if (use_mirror | default(false)) else '' }}"
when:
- "'masters' in group_names"
- not k3s_binary.stat.exists
changed_when: true
- name: Install K3s agent
ansible.builtin.shell: |
curl -sfL {{ k3s_install_url }} | {{ k3s_install_mirror }} INSTALL_K3S_VERSION={{ k3s_version }} sh -s - agent
ansible.builtin.command:
cmd: /tmp/k3s-install.sh agent
environment:
INSTALL_K3S_VERSION: "{{ k3s_version }}"
INSTALL_K3S_MIRROR: "{{ 'cn' if (use_mirror | default(false)) else '' }}"
when:
- "'agents' in group_names"
- not k3s_binary.stat.exists
changed_when: true
# 清理安装脚本
- name: Remove install script
ansible.builtin.file:
path: /tmp/k3s-install.sh
state: absent
# 等待 K3s 就绪 (仅 Server)
- name: Wait for K3s server ready
@@ -74,30 +89,19 @@
timeout: 120
when: "'masters' in group_names"
# 输出 Token (仅 cluster-init)
- name: Get node token
ansible.builtin.slurp:
src: /var/lib/rancher/k3s/server/node-token
register: node_token
when: cluster_init | default(false)
- name: Display node token
ansible.builtin.debug:
msg: |
══════════════════════════════════════════════════════════════
K3S_TOKEN (用于添加新节点):
{{ node_token.content | b64decode | trim }}
K3S_SERVER_URL:
https://{{ ansible_host }}:6443
══════════════════════════════════════════════════════════════
when: cluster_init | default(false)
# 保存 kubeconfig (仅 cluster-init)
- name: Fetch kubeconfig
ansible.builtin.fetch:
src: /etc/rancher/k3s/k3s.yaml
dest: "{{ playbook_dir }}/../kubeconfig.yaml"
flat: yes
flat: true
when: cluster_init | default(false)
- name: Update kubeconfig server address
ansible.builtin.replace:
path: "{{ playbook_dir }}/../kubeconfig.yaml"
regexp: 'server: https://127\.0\.0\.1:6443'
replace: "server: {{ ha_server_url if (ha_server_url | default('') | length > 0) else 'https://' + ansible_host + ':6443' }}"
delegate_to: localhost
become: false
when: cluster_init | default(false)
@@ -9,8 +9,14 @@ server: "{{ k3s_server_url }}"
token: "{{ k3s_token }}"
{% endif %}
# TLS SAN: 包含 HA 地址 + 所有 master 节点地址
tls-san:
- "{{ k3s_tls_san }}"
{% if ha_server_url | default('') | length > 0 %}
- "{{ ha_server_url | regex_replace('^https?://([^:]+)(:[0-9]+)?$', '\\1') }}"
{% endif %}
{% for host in groups['masters'] %}
- "{{ hostvars[host].ansible_host }}"
{% endfor %}
# ETCD 快照配置
etcd-snapshot-retention: {{ etcd_snapshot_retention }}
+21
查看文件
@@ -0,0 +1,21 @@
---
- name: Restart sshd
ansible.builtin.systemd:
name: sshd
state: restarted
listen: Restart sshd
- name: Update ansible port
ansible.builtin.set_fact:
ansible_port: "{{ ssh_new_port }}"
listen: Update ansible port
- name: Wait for new SSH port
ansible.builtin.wait_for:
port: "{{ ssh_new_port }}"
host: "{{ ansible_host }}"
delay: 5
timeout: 60
delegate_to: localhost
become: false
listen: Wait for new SSH port
+10 -28
查看文件
@@ -5,7 +5,7 @@
ansible.builtin.file:
path: /root/.ssh
state: directory
mode: '0700'
mode: "0700"
- name: Add SSH public key
ansible.builtin.authorized_key:
@@ -17,35 +17,17 @@
ansible.builtin.copy:
src: /etc/ssh/sshd_config
dest: /etc/ssh/sshd_config.bak
remote_src: yes
force: no
remote_src: true
force: false
mode: "0600"
- name: Deploy secure sshd_config
ansible.builtin.template:
src: sshd_config.j2
dest: /etc/ssh/sshd_config
mode: '0600'
validate: '/usr/sbin/sshd -t -f %s'
register: sshd_config
- name: Restart sshd service
ansible.builtin.systemd:
name: sshd
state: restarted
when: sshd_config.changed
- name: Update ansible_port to new SSH port
ansible.builtin.set_fact:
ansible_port: "{{ ssh_new_port }}"
when: sshd_config.changed
- name: Wait for SSH on new port
ansible.builtin.wait_for:
port: "{{ ssh_new_port }}"
host: "{{ ansible_host }}"
delay: 5
timeout: 60
delegate_to: localhost
become: no
when: sshd_config.changed
mode: "0600"
validate: "/usr/sbin/sshd -t -f %s"
notify:
- Restart sshd
- Update ansible port
- Wait for new SSH port