设为首页 收藏本站
查看: 1347|回复: 0

[经验分享] Openvswitch原理与代码分析(2): ovs-vswitchd的启动

[复制链接]

尚未签到

发表于 2017-6-25 10:26:14 | 显示全部楼层 |阅读模式
  ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run()。
DSC0000.png

  Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备。
  其中bridge_run就是初始化数据库中已经创建的虚拟网桥。
一、虚拟网桥的初始化bridge_run
  bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调用ofproto_run



  • staticvoid

  • bridge_run__(void)

  • {

  • ……

  •     /* Let each bridge do the work that it needs to do. */

  •     HMAP_FOR_EACH (br, node, &all_bridges) {

  •         ofproto_run(br->ofproto);

  •     }

  • }
  Int ofproto_run(struct ofproto *p)会调用error = p->ofproto_class->run(p);
  ofproto_class的定义在ofproto-provider.h中,它的实现定义在ofproto-dpif.c中,这里面的所有的函数,在这个文件中都有定义。



  • conststruct ofproto_class ofproto_dpif_class = {

  •     init,

  •     enumerate_types,

  •     enumerate_names,

  •     del,

  •     port_open_type,

  •     type_run,

  •     type_wait,

  •     alloc,

  •     construct,

  •     destruct,

  •     dealloc,

  •     run,

  •     wait,

  •     NULL, /* get_memory_usage. */

  •     type_get_memory_usage,

  •     flush,

  •     query_tables,

  •     set_tables_version,

  •     port_alloc,

  •     port_construct,

  •     port_destruct,

  •     port_dealloc,

  •     port_modified,

  •     port_reconfigured,

  •     port_query_by_name,

  •     port_add,

  •     port_del,

  •     port_get_stats,

  •     port_dump_start,

  •     port_dump_next,

  •     port_dump_done,

  •     port_poll,

  •     port_poll_wait,

  •     port_is_lacp_current,

  •     port_get_lacp_stats,

  •     NULL, /* rule_choose_table */

  •     rule_alloc,

  •     rule_construct,

  •     rule_insert,

  •     rule_delete,

  •     rule_destruct,

  •     rule_dealloc,

  •     rule_get_stats,

  •     rule_execute,

  •     set_frag_handling,

  •     packet_out,

  •     set_netflow,

  •     get_netflow_ids,

  •     set_sflow,

  •     set_ipfix,

  •     set_cfm,

  •     cfm_status_changed,

  •     get_cfm_status,

  •     set_lldp,

  •     get_lldp_status,

  •     set_aa,

  •     aa_mapping_set,

  •     aa_mapping_unset,

  •     aa_vlan_get_queued,

  •     aa_vlan_get_queue_size,

  •     set_bfd,

  •     bfd_status_changed,

  •     get_bfd_status,

  •     set_stp,

  •     get_stp_status,

  •     set_stp_port,

  •     get_stp_port_status,

  •     get_stp_port_stats,

  •     set_rstp,

  •     get_rstp_status,

  •     set_rstp_port,

  •     get_rstp_port_status,

  •     set_queues,

  •     bundle_set,

  •     bundle_remove,

  •     mirror_set__,

  •     mirror_get_stats__,

  •     set_flood_vlans,

  •     is_mirror_output_bundle,

  •     forward_bpdu_changed,

  •     set_mac_table_config,

  •     set_mcast_snooping,

  •     set_mcast_snooping_port,

  •     set_realdev,

  •     NULL, /* meter_get_features */

  •     NULL, /* meter_set */

  •     NULL, /* meter_get */

  •     NULL, /* meter_del */

  •     group_alloc, /* group_alloc */

  •     group_construct, /* group_construct */

  •     group_destruct, /* group_destruct */

  •     group_dealloc, /* group_dealloc */

  •     group_modify, /* group_modify */

  •     group_get_stats, /* group_get_stats */

  •     get_datapath_version, /* get_datapath_version */

  • };
  在ofproto-provider.h中注释里是这样说的。
  这里定义了四类数据结构
  Struct ofproto表示一个交换机
  Struct ofport表示交换机上的一个端口
  Struct rule表示交换机上的一条flow规则
  Struct ofgroup表示一个flow规则组
  上面说到启动的过程中,会调用ofproto_class->run,也即会调用ofproto-dpif.c中的static int run(struct ofproto *ofproto_)函数。
  在这个函数中,会初始化netflow, sflow, ipfix,stp, rstp, mac address learning等一系列操作。
  bridge_run还会调用static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg),其中ovs_cfg是从ovsdb-server里面读取出来的配置。
  在这个函数里面,对于每一个网桥,将网卡添加进去



  • HMAP_FOR_EACH (br, node, &all_bridges) {

  •     bridge_add_ports(br, &br->wanted_ports);

  •     shash_destroy(&br->wanted_ports);

  • }



  • staticvoid

  • bridge_add_ports(struct bridge *br, conststruct shash *wanted_ports)

  • {

  •     /* First add interfaces that request a particular port number. */

  •     bridge_add_ports__(br, wanted_ports, true);

  •  

  •     /* Then add interfaces that want automatic port number assignment.

  •      * We add these afterward to avoid accidentally taking a specifically

  •      * requested port number. */

  •     bridge_add_ports__(br, wanted_ports, false);

  • }
  static void bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)会调用
  static bool iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg)会调用
  static int iface_do_create(const struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp, char **errp)会调用
  int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp)会调用



  • error = ofproto->ofproto_class->port_add(ofproto, netdev);
  会调用ofproto-dpif.c中的ofproto_dpif_class的static int port_add(struct ofproto *ofproto_, struct netdev *netdev)函数。
  会调用int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)会调用



  • error = dpif->dpif_class->port_add(dpif, netdev, &port_no);
  会调用dpif_netlink_class的port_add函数,也即dpif_netlink_port_add,也即
  static int dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,odp_port_t *port_nop)会调用
  static int dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev,                       odp_port_t *port_nop)
  在这个函数里面,会调用netlink的API,命令为OVS_VPORT_CMD_NEW



  • constchar *name = netdev_vport_get_dpif_port(netdev,

  •                                                   namebuf, sizeof namebuf);

  • struct dpif_netlink_vport request, reply;

  • struct nl_sock **socksp = NULL;

  •  

  • if (dpif->handlers) {

  •     socksp = vport_create_socksp(dpif, &error);

  •     if (!socksp) {

  •         return error;

  •     }

  • }

  •  

  • dpif_netlink_vport_init(&request);

  • request.cmd = OVS_VPORT_CMD_NEW;

  • request.dp_ifindex = dpif->dp_ifindex;

  • request.type = netdev_to_ovs_vport_type(netdev);

  •  

  • request.name = name;

  •  

  • upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);

  • request.n_upcall_pids = socksp ? dpif->n_handlers : 1;

  • request.upcall_pids = upcall_pids;

  • error = dpif_netlink_vport_transact(&request, &reply, &buf);
  这里会调用内核模块openvswitch.ko,在内核中添加虚拟网卡。这部分详细的过程将在下一节分析。
二、虚拟网卡的初始化netdev_run()



  • void

  • netdev_run(void)

  •     OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)

  • {

  •     struct netdev_registered_class *rc;

  •  

  •     netdev_initialize();

  •     ovs_mutex_lock(&netdev_class_mutex);

  •     HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) {

  •         if (rc->class->run) {

  •             rc->class->run();

  •         }

  •     }

  •     ovs_mutex_unlock(&netdev_class_mutex);

  • }
  依次循环调用netdev_classes中的每一个run。
  对于不同类型的虚拟网卡,都有对应的netdev_class。
  例如对于dpdk的网卡有



  • staticconststruct netdev_class dpdk_class =

  •     NETDEV_DPDK_CLASS(

  •         "dpdk",

  •         NULL,

  •         netdev_dpdk_construct,

  •         netdev_dpdk_destruct,

  •         netdev_dpdk_set_multiq,

  •         netdev_dpdk_eth_send,

  •         netdev_dpdk_get_carrier,

  •         netdev_dpdk_get_stats,

  •         netdev_dpdk_get_features,

  •         netdev_dpdk_get_status,

  •         netdev_dpdk_rxq_recv);
  对于物理网卡,也需要有相应的netdev_class



  • conststruct netdev_class netdev_linux_class =

  •     NETDEV_LINUX_CLASS(

  •         "system",

  •         netdev_linux_construct,

  •         netdev_linux_get_stats,

  •         netdev_linux_get_features,

  •         netdev_linux_get_status);
  对于连接到KVM的tap网卡



  • conststruct netdev_class netdev_tap_class =

  •     NETDEV_LINUX_CLASS(

  •         "tap",

  •         netdev_linux_construct_tap,

  •         netdev_tap_get_stats,

  •         netdev_linux_get_features,

  •         netdev_linux_get_status);
  对于虚拟的软网卡,比如veth pair



  • conststruct netdev_class netdev_internal_class =

  •     NETDEV_LINUX_CLASS(

  •         "internal",

  •         netdev_linux_construct,

  •         netdev_internal_get_stats,

  •         NULL, /* get_features */

  •         netdev_internal_get_status);
  其中NETDEV_LINUX_CLASS是一个宏,不是所有的参数都需要全部填写。



  • #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, \

  •                            GET_FEATURES, GET_STATUS) \

  • { \

  •     NAME, \

  •                                                                 \

  •     NULL, \

  •     netdev_linux_run, \

  •     netdev_linux_wait, \

  •                                                                 \

  •     netdev_linux_alloc, \

  •     CONSTRUCT, \

  •     netdev_linux_destruct, \

  •     netdev_linux_dealloc, \

  •     NULL, /* get_config */ \

  •     NULL, /* set_config */ \

  •     NULL, /* get_tunnel_config */ \

  •     NULL, /* build header */ \

  •     NULL, /* push header */ \

  •     NULL, /* pop header */ \

  •     NULL, /* get_numa_id */ \

  •     NULL, /* set_multiq */ \

  •                                                                 \

  •     netdev_linux_send, \

  •     netdev_linux_send_wait, \

  •                                                                 \

  •     netdev_linux_set_etheraddr, \

  •     netdev_linux_get_etheraddr, \

  •     netdev_linux_get_mtu, \

  •     netdev_linux_set_mtu, \

  •     netdev_linux_get_ifindex, \

  •     netdev_linux_get_carrier, \

  •     netdev_linux_get_carrier_resets, \

  •     netdev_linux_set_miimon_interval, \

  •     GET_STATS, \

  •                                                                 \

  •     GET_FEATURES, \

  •     netdev_linux_set_advertisements, \

  •                                                                 \

  •     netdev_linux_set_policing, \

  •     netdev_linux_get_qos_types, \

  •     netdev_linux_get_qos_capabilities, \

  •     netdev_linux_get_qos, \

  •     netdev_linux_set_qos, \

  •     netdev_linux_get_queue, \

  •     netdev_linux_set_queue, \

  •     netdev_linux_delete_queue, \

  •     netdev_linux_get_queue_stats, \

  •     netdev_linux_queue_dump_start, \

  •     netdev_linux_queue_dump_next, \

  •     netdev_linux_queue_dump_done, \

  •     netdev_linux_dump_queue_stats, \

  •                                                                 \

  •     netdev_linux_get_in4, \

  •     netdev_linux_set_in4, \

  •     netdev_linux_get_in6, \

  •     netdev_linux_add_router, \

  •     netdev_linux_get_next_hop, \

  •     GET_STATUS, \

  •     netdev_linux_arp_lookup, \

  •                                                                 \

  •     netdev_linux_update_flags, \

  •                                                                 \

  •     netdev_linux_rxq_alloc, \

  •     netdev_linux_rxq_construct, \

  •     netdev_linux_rxq_destruct, \

  •     netdev_linux_rxq_dealloc, \

  •     netdev_linux_rxq_recv, \

  •     netdev_linux_rxq_wait, \

  •     netdev_linux_rxq_drain, \

  • }
  rc->class->run()调用的是netdev-linux.c下的netdev_linux_run
  netdev_linux_run会调用netlink的sock得到虚拟网卡的状态,并且更新状态。



  • error = nl_sock_recv(sock, &buf, false);

  • if (!error) {

  •     struct rtnetlink_change change;

  •     if (rtnetlink_parse(&buf, &change)) {

  •         struct netdev *netdev_ = netdev_from_name(change.ifname);

  •         if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {

  •            struct netdev_linux *netdev = netdev_linux_cast(netdev_);

  •            ovs_mutex_lock(&netdev->mutex);

  •            netdev_linux_update(netdev, &change);

  •            ovs_mutex_unlock(&netdev->mutex);

  •         }

  •         netdev_close(netdev_);

  •      }

  • }

运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-387884-1-1.html 上篇帖子: 非常好!!!Linux源代码阅读——环境准备【转】 下篇帖子: Linux面试题,面试总结的
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表