linux的tunnel技术实现

5290589 · 发表于 2018-5-23 10:59:15

（一）tunnel即隧道，被用于在公网内传输私网数据，也就是VPN。实现类似于我们学习的数据结构中的栈，把数据报文封装在新的报文中，通过第三方协议(比如IP协议)传输到对端，对端进行解封，重新路由。

linux内核支持IPIP/GRE隧道协议（不考虑IPV6） tunnel4.c是一个框架程序，相当于容器，ipip是他肚子里的实体。觉得没有必要这么写，因为ip_gre.c的实现就不是这样的。

IPIP是最简单的实现隧道功能的协议，只支持承载IP报文，所以在应用上也就有了局限性，比如无法实现ARP代理，但对于分析设计思路还是非常好的，简单的东西往往更具有代表性，复杂的东西简单化么。

（二）在ipip中，首先要理解的是初始化过程

static int __init ipip_init(void)
{
int err;

printk(banner);
//在框架程序里添加接收处理函数
if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
      printk(KERN_INFO "ipip init: can't register tunnel\n");
      return -EAGAIN;
}
//创建虚拟接口，这个很重要，在配置好tunnel后，如果发送的目的地址是私网的IP，路由系统就会把报文发送给这个虚拟接口，这样私网报文就通过ipip_tunnel_xmit函数被封装起来了，通过路由系统重新路由，找到公网IP对应的物理接口，把报文通过这个真是的物理接口发送给对端网关，也就是隧道的另一端。
ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
                  "tunl0",
                  ipip_tunnel_setup);
if (!ipip_fb_tunnel_dev) {
      err = -ENOMEM;
      goto err1;
}

ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;

if ((err = register_netdev(ipip_fb_tunnel_dev)))
      goto err2;
out:
return err;
err2:
free_netdev(ipip_fb_tunnel_dev);
err1:
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
goto out;
}

（三）调用上顺序上，

报文从以太接口接收，交给二层处理netif_receive_skb；

二层发现是IP报文，交给三层处理ip_rcv；

三层根据IP协议中的协议号，发现时IPPROTO_IPIP报文，交给tunnel4_rcv->ipip_rcv这才是核心接收处理流程。

static int ipip_rcv(struct sk_buff *skb)
{
struct iphdr *iph;
struct ip_tunnel *tunnel;
iph = skb->nh.iph;
read_lock(&ipip_lock);
//由于可能有多个tunnel虚拟接口，先查找到对应的tunnel接口
if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
      if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
         read_unlock(&ipip_lock);
         kfree_skb(skb);
         return 0;
      }
      secpath_reset(skb);
      skb->mac.raw = skb->nh.raw;//把二层地址指向三层数据的起始地址
      skb->nh.raw = skb->data;//三层地址指向数据，也就是其封装的IP报文地址，其实还是三层报文地址，相当于做了还原。
      skb->protocol = htons(ETH_P_IP);//修改协议，不然下一次报文仍然会进入这个函数
      skb->pkt_type = PACKET_HOST;//本机报文
      tunnel->stat.rx_packets++;
      tunnel->stat.rx_bytes += skb->len;
      skb->dev = tunnel->dev;//把tunnel接口指向真正的物理接口
      dst_release(skb->dst);
      skb->dst = NULL;
      nf_reset(skb);
      ipip_ecn_decapsulate(iph, skb);
      netif_rx(skb);//把这个报文重新发给二层缓存，重新分发。
      read_unlock(&ipip_lock);
      return 0;
}
read_unlock(&ipip_lock);
return -1;
}　　如果能读懂上面的程序，也就理解了tunnel技术。而后面的衍生技术GRE/SIT都只不过是其v2.0/v3.0。在往后就可以理解多种VPN实现，L2TP,PPTP,MPLS VPN等技术。
　　下面的函数是发送给tunnel接口报文的处理，就是recv函数的逆实现。
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct net_device_stats *stats = &tunnel->stat;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
struct rtable *rt;          /* Route to the other host */
struct net_device *tdev;          /* Device to other host */
struct iphdr *old_iph = skb->nh.iph;
struct iphdr *iph;          /* Our new IP header */
int max_headroom;          /* The extra header space needed */
__be32 dst = tiph->daddr;
int mtu;

if (tunnel->recursion++) {
      tunnel->stat.collisions++;
      goto tx_error;
}

if (skb->protocol != htons(ETH_P_IP))
      goto tx_error;

if (tos&1)
      tos = old_iph->tos;

if (!dst) {
      /* NBMA tunnel */
      if ((rt = (struct rtable*)skb->dst) == NULL) {
         tunnel->stat.tx_fifo_errors++;
         goto tx_error;
      }
      if ((dst = rt->rt_gateway) == 0)
         goto tx_error_icmp;
}

{
      struct flowi fl = { .oif = tunnel->parms.link,
               .nl_u = { .ip4_u =
                  { .daddr = dst,
                     .saddr = tiph->saddr,
                     .tos = RT_TOS(tos) } },
               .proto = IPPROTO_IPIP };
      if (ip_route_output_key(&rt, &fl)) {
         tunnel->stat.tx_carrier_errors++;
         goto tx_error_icmp;
      }
}
tdev = rt->u.dst.dev;

if (tdev == dev) {
      ip_rt_put(rt);
      tunnel->stat.collisions++;
      goto tx_error;
}

if (tiph->frag_off)
      mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
else
      mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;

if (mtu < 68) {
      tunnel->stat.collisions++;
      ip_rt_put(rt);
      goto tx_error;
}
if (skb->dst)
      skb->dst->ops->update_pmtu(skb->dst, mtu);

df |= (old_iph->frag_off&htons(IP_DF));

if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
      icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
      ip_rt_put(rt);
      goto tx_error;
}

if (tunnel->err_count > 0) {
      if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
         tunnel->err_count--;
         dst_link_failure(skb);
      } else
         tunnel->err_count = 0;
}

/*
   * Okay, now see if we can stuff it in the buffer as-is.
   */
max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));

if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
      struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
      if (!new_skb) {
         ip_rt_put(rt);
         stats->tx_dropped++;
         dev_kfree_skb(skb);
         tunnel->recursion--;
         return 0;
      }
      if (skb->sk)
         skb_set_owner_w(new_skb, skb->sk);
      dev_kfree_skb(skb);
      skb = new_skb;
      old_iph = skb->nh.iph;
}

skb->h.raw = skb->nh.raw;
skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
         IPSKB_REROUTED);
dst_release(skb->dst);
skb->dst = &rt->u.dst;

/*
   * Push down and install the IPIP header.
   */

iph          = skb->nh.iph;
iph->version       = 4;
iph->ihl       = sizeof(struct iphdr)>>2;
iph->frag_off       = df;
iph->protocol       = IPPROTO_IPIP;
iph->tos       = INET_ECN_encapsulate(tos, old_iph->tos);
iph->daddr       = rt->rt_dst;
iph->saddr       = rt->rt_src;

if ((iph->ttl = tiph->ttl) == 0)
      iph->ttl = old_iph->ttl;

nf_reset(skb);

IPTUNNEL_XMIT();
tunnel->recursion--;
return 0;

tx_error_icmp:
dst_link_failure(skb);
tx_error:
stats->tx_errors++;
dev_kfree_skb(skb);
tunnel->recursion--;
return 0;
}

账号		自动登录	找回密码
密码			立即注册

wirelessnetview好用的无线分析工具

Red Hat RHCE 8 (EX294) Cert Guide

Shell从入门到精通（阿良）

亿图图示专家(EDraw Max) V7.9 中文破解版

zabbix3.4.1安装部署+微信推送信息+大屏显

Red Hat OpenShift I: Containers & Kubern

2025 年，C++ 还能“硬核”多久？

[经验分享] linux的tunnel技术实现

浏览过的版块

扫码加入运维网微信交流群