#打卡不停更# calico Vxlan 跨节点通信 原创
安装 calico vxlan
部署模板地址:https://projectcalico.docs.tigera.io/manifests/calico-vxlan.yaml
修改如下参数:
将 CrossSubnet 改为 vxlan
- name: CALICO_IPV4POOL_VXLAN
value: "CrossSubnet"
- name: CALICO_IPV4POOL_VXLAN
value: "vxlan"
# 修改 CIDP 保持和 kubeconfig 默认的一致
- name: CALICO_IPV4POOL_CIDR
value: "10.244.0.0/16"
需要确认 backend 是否是 vxlan
calico_backend: "vxlan"
部署
kubectl apply -f calico-vxlan.yaml
我们可以确认calico vxlan 模式并未通过 BGP 来进行维护,这一点和 IPIP 有着本质的区别。
[root@master ~]# calicoctl node status
Calico process is running.
The BGP backend process (BIRD) is not running.
当前环境
[root@master ~]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
pod1 1/1 Running 0 69s 10.244.103.68 node2.whale.com <none> <none>
pod3 1/1 Running 0 48s 10.244.42.71 node1.whale.com <none> <none>
pod1 10.244.103.68 node2 192.168.0.82
pod3 10.244.42.71 node1 192.168.0.81
pod1 及其对应节点的cali网卡和路由表
# pod1 及其对应节点的cali网卡和路由表
[root@master ~]# kubectl exec -it pod1 -- ifconfig eth0
eth0 Link encap:Ethernet HWaddr 26:7A:E7:8B:C4:48
inet addr:10.244.103.68 Bcast:0.0.0.0 Mask:255.255.255.255
UP BROADCAST RUNNING MULTICAST MTU:1450 Metric:1
RX packets:11 errors:0 dropped:0 overruns:0 frame:0
TX packets:6 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:810 (810.0 B) TX bytes:364 (364.0 B)
[root@master ~]# kubectl exec -it pod1 -- ethtool -S eth0
NIC statistics:
peer_ifindex: 7
rx_queue_0_xdp_packets: 0
rx_queue_0_xdp_bytes: 0
rx_queue_0_xdp_drops: 0
[root@master ~]# kubectl exec -it pod1 -- route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
169.254.1.1 0.0.0.0 255.255.255.255 UH 0 0 0 eth0
[root@node2 ~]# ip link show | grep ^7
7: calice0906292e2@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP mode DEFAULT group default
[root@node2 ~]# ip link show vxlan.calico
6: vxlan.calico: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UNKNOWN mode DEFAULT group default
link/ether 66:78:26:30:e8:cf brd ff:ff:ff:ff:ff:ff
[root@node2 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
10.244.42.64 10.244.42.64 255.255.255.192 UG 0 0 0 vxlan.calico
[root@node2 ~]# ifconfig vxlan.calico
vxlan.calico: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1450
inet 10.244.103.64 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::6478:26ff:fe30:e8cf prefixlen 64 scopeid 0x20<link>
ether 66:78:26:30:e8:cf txqueuelen 0 (Ethernet)
RX packets 1 bytes 84 (84.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1 bytes 84 (84.0 B)
TX errors 0 dropped 11 overruns 0 carrier 0 collisions 0
pod3 及其对应节点的 cali 网卡和路由表
# pod3 及其对应节点的cali网卡和路由表
[root@master ~]# kubectl exec -it pod3 -- ifconfig eth0
eth0 Link encap:Ethernet HWaddr AE:DE:E7:84:F7:C2
inet addr:10.244.42.71 Bcast:0.0.0.0 Mask:255.255.255.255
UP BROADCAST RUNNING MULTICAST MTU:1450 Metric:1
RX packets:11 errors:0 dropped:0 overruns:0 frame:0
TX packets:6 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:810 (810.0 B) TX bytes:364 (364.0 B)
[root@master ~]# kubectl exec -it pod3 -- ethtool -S eth0
NIC statistics:
peer_ifindex: 10
rx_queue_0_xdp_packets: 0
rx_queue_0_xdp_bytes: 0
rx_queue_0_xdp_drops: 0
[root@master ~]# kubectl exec -it pod3 -- route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
169.254.1.1 0.0.0.0 255.255.255.255 UH 0 0 0 eth0
[root@node1 ~]# ip link show | grep ^10
10: cali49778cadcf1@if3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UP mode DEFAULT group default
[root@node1 ~]# ip link show vxlan.calico
7: vxlan.calico: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1450 qdisc noqueue state UNKNOWN mode DEFAULT group default
link/ether 66:52:2e:2a:6b:f4 brd ff:ff:ff:ff:ff:ff
[root@node1 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 192.168.0.1 0.0.0.0 UG 100 0 0 ens33
10.244.42.64 0.0.0.0 255.255.255.192 U 0 0 0 *
10.244.42.68 0.0.0.0 255.255.255.255 UH 0 0 0 calicfa85ffd8bd
10.244.42.69 0.0.0.0 255.255.255.255 UH 0 0 0 cali44307f7c2ca
10.244.42.70 0.0.0.0 255.255.255.255 UH 0 0 0 cali27794099b3f
10.244.42.71 0.0.0.0 255.255.255.255 UH 0 0 0 cali49778cadcf1
10.244.103.64 10.244.103.64 255.255.255.192 UG 0 0 0 vxlan.calico
10.244.152.128 10.244.152.128 255.255.255.192 UG 0 0 0 vxlan.calico
172.17.0.0 0.0.0.0 255.255.0.0 U 0 0 0 docker0
192.168.0.0 0.0.0.0 255.255.255.0 U 100 0 0 ens33
[root@node1 ~]# ifconfig vxlan.calico
vxlan.calico: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1450
inet 10.244.42.64 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::6452:2eff:fe2a:6bf4 prefixlen 64 scopeid 0x20<link>
ether 66:52:2e:2a:6b:f4 txqueuelen 0 (Ethernet)
RX packets 1 bytes 84 (84.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1 bytes 84 (84.0 B)
TX errors 0 dropped 11 overruns 0 carrier 0 collisions 0
数据流向图
pod1 ping pod3
kubectl exec -it pod1 -- ping -c 1 10.244.42.71
pod1.cap
tcpdump -pne -i cali49778cadcf1 -w pod1.cap
具体细节
[root@master ~]# kubectl exec -it pod1 -- route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 169.254.1.1 0.0.0.0 UG 0 0 0 eth0
169.254.1.1 0.0.0.0 255.255.255.255 UH 0 0 0 eth0
pod1-vxlan.cap
tcpdump -pne -i vxlan.calico -w pod1-vxlan.cap
具体细节
通过主机路由查找网关 和 对应的网卡出口
然后 通过发送 arp 请求,找到对端网关的mac 地址,也就是对应 pod3 节点的 vxlan.calico 网卡
[root@node2 ~]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
10.244.42.64 10.244.42.64 255.255.255.192 UG 0 0 0 vxlan.calico
[root@node2 ~]# ifconfig vxlan.calico
vxlan.calico: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1450
inet 10.244.103.64 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::6478:26ff:fe30:e8cf prefixlen 64 scopeid 0x20<link>
ether 66:78:26:30:e8:cf txqueuelen 0 (Ethernet)
RX packets 1 bytes 84 (84.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1 bytes 84 (84.0 B)
TX errors 0 dropped 11 overruns 0 carrier 0 collisions 0
[root@node1 ~]# ifconfig vxlan.calico
vxlan.calico: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1450
inet 10.244.42.64 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::6452:2eff:fe2a:6bf4 prefixlen 64 scopeid 0x20<link>
ether 66:52:2e:2a:6b:f4 txqueuelen 0 (Ethernet)
RX packets 1 bytes 84 (84.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1 bytes 84 (84.0 B)
TX errors 0 dropped 11 overruns 0 carrier 0 collisions 0
pod1-node.cap
tcpdump -pne -i ens33 -w pod1-node.cap
具体细节,从 ip -d link show 可以看到本端的local ip。然后通过fdb表可以看到对端calico.vxlan的网卡所在的节点,即为remote ip。
local ip remote ip vni id dstport 这些原素就可以指导封装VxLAN的数据包了。
[root@node1 ~]# ip -d link show vxlan.calico
7: vxlan.calico: <BROADCAST,MULTICAST> mtu 1450 qdisc noqueue state DOWN mode DEFAULT group default
link/ether 66:52:2e:2a:6b:f4 brd ff:ff:ff:ff:ff:ff promiscuity 0
vxlan id 4096 local 192.168.0.81 dev ens33 srcport 0 0 dstport 4789 nolearning ageing 300 udpcsum noudp6zerocsumtx noudp6zerocsumrx addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
[root@node1 ~]# bridge fdb show
66:78:26:30:e8:cf dev vxlan.calico dst 192.168.0.82 self permanent
66:b1:18:59:9a:ed dev vxlan.calico dst 192.168.0.80 self permanent
pod3.cap
tcpdump -pne -i cali49778cadcf1 -w pod3.cap
细节和上边保持一致即可
pod3-vxlan.cap
tcpdump -pne -i vxlan.calico -w pod3-vxlan.cap
![[…/image/Pasted image 20220517220839.png]]
pod3-node.cap
tcpdump -pne -i ens33 -w pod3-node.cap
[root@node2 ~]# ip -d link show vxlan.calico
6: vxlan.calico: <BROADCAST,MULTICAST> mtu 1450 qdisc noqueue state DOWN mode DEFAULT group default
link/ether 66:78:26:30:e8:cf brd ff:ff:ff:ff:ff:ff promiscuity 0
vxlan id 4096 local 192.168.0.82 dev ens33 srcport 0 0 dstport 4789 nolearning ageing 300 udpcsum noudp6zerocsumtx noudp6zerocsumrx addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
[root@node2 ~]# bridge fdb show
66:52:2e:2a:6b:f4 dev vxlan.calico dst 192.168.0.81 self permanent
66:b1:18:59:9a:ed dev vxlan.calico dst 192.168.0.80 self permanent
结论
通过报文的抓取,我们验证了 calico vxlan 跨节点通信的过程
![[…/image/Pasted image 20220517213725.png]]
我们发现 pod1 所在节点上发现一条很关键的路由表
pod1 10.244.103.68 node2 192.168.0.82
pod3 10.244.42.71 node1 192.168.0.81
在去往 10.244.42.64/26 这个网段的路由,都需要经过 vxlan.calico
网卡,我们要去往的 pod3 10.244.42.71
就需要经过 vxlan 的封装。
Destination Gateway Genmask Flags Metric Ref Use Iface
10.244.42.64 10.244.42.64 255.255.255.192 UG 0 0 0 vxlan.calico
到达对端 pod3 所在节点解封装,然后查询本地路由到目的 pod。
Destination Gateway Genmask Flags Metric Ref Use Iface
10.244.42.71 0.0.0.0 255.255.255.255 UH 0 0 0 cali49778cadcf1