|
上一节只是说了关于网桥的接收处理,这里分析下网桥模块的初始化工作. 对于桥的用户空间配置工具一般来说有brctl和ebtables.
参考内核 2.6.32.61 kernel/net/bridge/*
我们来看一下br.c中br_init函数
static int __init br_init(void)
{
int err;
err = stp_proto_register(&br_stp_proto); //注册stp协议,把协议方到garp_protos里,在net/802/stp.c stp_pdu_rcv中有使用
static const struct stp_proto br_stp_proto = {
.rcv = br_stp_rcv,
};
if (err < 0) {
printk(KERN_ERR "bridge: can't register sap for STP\n");
return err;
}
err = br_fdb_init(); // 创建fdb 缓冲区
if (err)
goto err_out;
err = register_pernet_subsys(&br_net_ops); //注册桥子系统,主要是网络命名空间.
if (err)
goto err_out1;
err = br_netfilter_init(); //netfilter桥部分的初始化,ebtables工具配置使用.
int __init br_netfilter_init(void)
{
int ret;
ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
if (ret < 0)
return ret;
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
return -ENOMEM;
}
#endif
printk(KERN_NOTICE "Bridge firewalling registered\n");
return 0;
}
if (err)
goto err_out2;
err = register_netdevice_notifier(&br_device_notifier); //注册桥设备关心的通知链
if (err)
goto err_out3;
err = br_netlink_init(); // netlink
if (err)
goto err_out4;
brioctl_set(br_ioctl_deviceless_stub); // ioctl socket netlink相关的 不是设备本身的ioctl
br_handle_frame_hook = br_handle_frame;
#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
return 0;
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
br_netfilter_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
br_fdb_fini();
err_out:
stp_proto_unregister(&br_stp_proto);
return err;
}
这里没有分析什么,初始化的部分内容不是很多,但是都很重要.
下面分析一下桥的创建以及添加接口. 在br_if.c中
int br_add_bridge(struct net *net, const char *name)
{
struct net_device *dev;
int ret;
dev = new_bridge_dev(net, name); //这个函数的精华部分 ,申请netdevice,并初始化
if (!dev)
return -ENOMEM;
rtnl_lock();
if (strchr(dev->name, '%')) {
ret = dev_alloc_name(dev, dev->name); //申请名字
if (ret < 0)
goto out_free;
}
SET_NETDEV_DEVTYPE(dev, &br_type);
ret = register_netdevice(dev); //注册设备
if (ret)
goto out_free;
ret = br_sysfs_addbr(dev); // sysfs文件系统相关的
if (ret)
unregister_netdevice(dev);
out:
rtnl_unlock();
return ret;
out_free:
free_netdev(dev);
goto out;
}
我们看new_bridge_dev
static struct net_device *new_bridge_dev(struct net *net, const char *name)
{
struct net_bridge *br;
struct net_device *dev;
dev = alloc_netdev(sizeof(struct net_bridge), name, //申请设备,并br_dev_setup初始化 (模式和普通的网卡初始化没什么区别)
void br_dev_setup(struct net_device *dev)
{
random_ether_addr(dev->dev_addr);
ether_setup(dev);
dev->netdev_ops = &br_netdev_ops; //初始化设备的桥操作
dev->destructor = free_netdev;
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
dev->priv_flags = IFF_EBRIDGE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
}
br_dev_setup);
if (!dev)
return NULL;
dev_net_set(dev, net); //网络命名空间
br = netdev_priv(dev); // br结构指向了netdev私有空间
br->dev = dev;
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80; //桥特权级默认0x8000
br->bridge_id.prio[1] = 0x00;
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features; //对br的初始化工作
br->stp_enabled = BR_NO_STP; //默认stp关闭
br->designated_root = br->bridge_id;
br->root_path_cost = 0;
br->root_port = 0; //根port默认为0
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->topology_change = 0;
br->topology_change_detected = 0;
br->ageing_time = 300 * HZ;
br_netfilter_rtable_init(br); //初始化桥路由信息,pmtu.可以看这个函数的注释
/*
* Initialize bogus route table used to keep netfilter happy.
* Currently, we fill in the PMTU entry because netfilter
* refragmentation needs it, and the rt_flags entry because
* ipt_REJECT needs it. Future netfilter modules might
* require us to fill additional fields.
*/
void br_netfilter_rtable_init(struct net_bridge *br)
{
struct rtable *rt = &br->fake_rtable;
atomic_set(&rt->u.dst.__refcnt, 1);
rt->u.dst.dev = br->dev;
rt->u.dst.path = &rt->u.dst;
rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
rt->u.dst.flags = DST_NOXFRM;
rt->u.dst.ops = &fake_dst_ops;
}
INIT_LIST_HEAD(&br->age_list); //初始化老化时间链表
br_stp_timer_init(br); //stp相关的几个timer 4个:hello_timer、tcn_timer、topology_change_timer、gc_timer
void br_stp_timer_init(struct net_bridge *br)
{
setup_timer(&br->hello_timer, br_hello_timer_expired,
(unsigned long) br);
setup_timer(&br->tcn_timer, br_tcn_timer_expired,
(unsigned long) br);
setup_timer(&br->topology_change_timer,
br_topology_change_timer_expired,
(unsigned long) br);
setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
}
return dev;
}
这里简单说下定时器的作用:它主要跟stp相关
hello timer:用于定期产生配置bpdu。只有根网桥可以使用该定时器
tcn timer:由检测到拓扑变化而且必须通知根网桥的网桥使用.
Topology change timer: (拓扑变化 TC)
由根网桥使用,以便记住要在其配置bpdu中设定一个特殊标示。此标示用于将拓扑变化通知其他网桥(非根)
Aging 定时器 (gc timer)
用于从转发数据库中清除无效的地址。该定时器由网桥使用,而不论其是否启用stp
我们继续看看往桥里添加一个接口:
关于这个函数的调用,可以参考brctl 命令的使用 例如:brctl addif br0 eth0
这里为什么要说呢,主要刚才我们注册br其实也是netdevice设备,那么struct net_bridge呢?看下ioctl的具体操作就明白了。
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
switch(cmd) {
case SIOCDEVPRIVATE:
return old_dev_ioctl(dev, rq, cmd);
case SIOCBRADDIF:
case SIOCBRDELIF:
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
}
pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
return -EOPNOTSUPP;
}
添加桥端口函数:br_add_if
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
int err = 0;
/* Don't allow bridging non-ethernet like devices */ //非以太网类型的设备退出
if ((dev->flags & IFF_LOOPBACK) ||
dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
return -EINVAL;
/* No bridging of bridges */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) //桥设备不能再加入桥
return -ELOOP;
/* Device is already being bridged */ //已经属于桥的端口不能直接加入另外一个桥里
if (dev->br_port != NULL)
return -EBUSY;
p = new_nbp(br, dev); // 初始化桥端口,和端口状态信息默认为blocking 且为指定端口 并且初始化port timer
if (IS_ERR(p))
return PTR_ERR(p);
err = dev_set_promiscuity(dev, 1);
if (err)
goto put_back;
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
goto err0;
err = br_fdb_insert(br, p, dev->dev_addr); // 添加到fdb里 mac--port
if (err)
goto err1;
err = br_sysfs_addif(p);
if (err)
goto err2;
rcu_assign_pointer(dev->br_port, p);
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list); 把port添加到br port_list
spin_lock_bh(&br->lock);
br_stp_recalculate_bridge_id(br);
br_features_recompute(br);
if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
(br->dev->flags & IFF_UP))
br_stp_enable_port(p); // 这里port状态为转发 根据具体情况
spin_unlock_bh(&br->lock);
br_ifinfo_notify(RTM_NEWLINK, p);
dev_set_mtu(br->dev, br_min_mtu(br)); //设置mtu
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
err2:
br_fdb_delete_by_port(br, p, 1);
err1:
kobject_put(&p->kobj);
p = NULL; /* kobject_put frees */
err0:
dev_set_promiscuity(dev, -1);
put_back:
dev_put(dev);
kfree(p);
return err;
}
图为桥和端口和fdb entry以及netdevice间的关系.
这里说一下port timers:
void br_stp_port_timer_init(struct net_bridge_port *p)
{
setup_timer(&p->message_age_timer, br_message_age_timer_expired,
(unsigned long) p);
setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
(unsigned long) p);
setup_timer(&p->hold_timer, br_hold_timer_expired,
(unsigned long) p);
}
message_age_timer: 由于bpdu携带的信息生存期是有限的,它用于强制执行这个生存期 限于收到bpdu数据
消息生存期 > 最大值 :丢弃
消息生存期 < 最大值 : 启动Message Age定时器.(时间为差值)
forward_delay_timer:负责状态转移,机制如图
hold_timer:限制端口bpdu的发送速率
我们这里看一下br_make_forwarding函数:
/* called under bridge lock */
static void br_make_forwarding(struct net_bridge_port *p)
{
struct net_bridge *br = p->br;
if (p->state != BR_STATE_BLOCKING)
return;
if (br->forward_delay == 0) {
p->state = BR_STATE_FORWARDING;
br_topology_change_detection(br);
del_timer(&p->forward_delay_timer);
}
else if (p->br->stp_enabled == BR_KERNEL_STP) //默认没有开启
p->state = BR_STATE_LISTENING;
else
p->state = BR_STATE_LEARNING; //正常情况下
br_log_state(p);
if (br->forward_delay != 0)
mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
}
这里br->forward_delay肯定不为0,见桥创建函数:
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
而stp默认是没有开启的,所以最后的状态是BR_STATE_LEARING ,也就是刚把端口加入桥的时候,在port timer :forward_delay_timer没有到期的时候,它处于学习状态,而到期触发定时器后:
static void br_forward_delay_timer_expired(unsigned long arg)
{
struct net_bridge_port *p = (struct net_bridge_port *) arg;
struct net_bridge *br = p->br;
pr_debug("%s: %d(%s) forward delay timer\n",
br->dev->name, p->port_no, p->dev->name);
spin_lock(&br->lock);
if (p->state == BR_STATE_LISTENING) {
p->state = BR_STATE_LEARNING;
mod_timer(&p->forward_delay_timer,
jiffies + br->forward_delay);
} else if (p->state == BR_STATE_LEARNING) {
p->state = BR_STATE_FORWARDING;
if (br_is_designated_for_some_port(br))
br_topology_change_detection(br);
}
br_log_state(p);
spin_unlock(&br->lock);
}
它把端口状态设置为了转发态.
关于桥下环路问题 即生成树协议,这里不做讨论 . 有兴趣的可以参考《深入理解linux网络内幕》第十五章生成树协议,以及参考相关的rfc.
新版生成树协议:
Rstp 802.1w
Mstp 802.1s 针对多个vlan的
当然这里说的还是比较粗略,只是作为一个深入学习的引子.
|
|