(本篇使用openvswitch 2.7.2,由於ovs改版迅速,網路上許多資源的解釋跟現在已經有些變化了)
Oepnvswitch分為UserSpace跟Kernel Space部分
UserSpace主要有ovs-vswitchd跟ovsdb-server兩個process
分別負責跟controller溝通以及存放相關設定
跟controller連線的地方主要在connmgr.c裡面
其中定義了struct ofconn跟 ofconn_run這個function
ofconn_run會call handle_openflow這個function
static void
ofconn_run(struct ofconn *ofconn,
void (*handle_openflow)(struct ofconn *,
const struct ofpbuf *ofp_msg))
{
struct connmgr *mgr = ofconn->connmgr;
size_t i;
for (i = 0; i < N_SCHEDULERS; i++) {
struct ovs_list txq;
pinsched_run(ofconn->schedulers[i], &txq);
do_send_packet_ins(ofconn, &txq);
}
rconn_run(ofconn->rconn);
/* Limit the number of iterations to avoid starving other tasks. */
for (i = 0; i < 50 && ofconn_may_recv(ofconn); i++) {
struct ofpbuf *of_msg = rconn_recv(ofconn->rconn);
if (!of_msg) {
break;
}
if (mgr->fail_open) {
fail_open_maybe_recover(mgr->fail_open);
}
handle_openflow(ofconn, of_msg);
ofpbuf_delete(of_msg);
}
if (time_msec() >= ofconn->next_op_report) {
ofconn_log_flow_mods(ofconn);
}
ovs_mutex_lock(&ofproto_mutex);
if (!rconn_is_alive(ofconn->rconn)) {
ofconn_destroy(ofconn);
} else if (!rconn_is_connected(ofconn->rconn)) {
ofconn_flush(ofconn);
}
ovs_mutex_unlock(&ofproto_mutex);
}
handle_openflow()會再call handle_openflow__()
handle_openflow__會判斷openflow packet的type
兩個function都在ofproto.c裡面
static void
handle_openflow(struct ofconn *ofconn, const struct ofpbuf *ofp_msg)
OVS_EXCLUDED(ofproto_mutex)
{
enum ofperr error = handle_openflow__(ofconn, ofp_msg);
if (error) {
ofconn_send_error(ofconn, ofp_msg->data, error);
}
COVERAGE_INC(ofproto_recv_openflow);
}
static enum ofperr
handle_openflow__(struct ofconn *ofconn, const struct ofpbuf *msg)
OVS_EXCLUDED(ofproto_mutex)
{
const struct ofp_header *oh = msg->data;
enum ofptype type;
enum ofperr error;
error = ofptype_decode(&type, oh);
if (error) {
return error;
}
if (oh->version >= OFP13_VERSION && ofpmsg_is_stat_request(oh)
&& ofpmp_more(oh)) {
/* We have no buffer implementation for multipart requests.
* Report overflow for requests which consists of multiple
* messages. */
return OFPERR_OFPBRC_MULTIPART_BUFFER_OVERFLOW;
}
switch (type) {
/* OpenFlow requests. */
case OFPTYPE_FLOW_MOD:
return handle_flow_mod(ofconn, oh);
這裡只針對flowmod,所以會call handle_flow_mod()
handle_flow_mod()一樣在ofproto.c
static enum ofperr
handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh)
OVS_EXCLUDED(ofproto_mutex)
{
struct ofproto *ofproto = ofconn_get_ofproto(ofconn);
struct ofputil_flow_mod fm;
uint64_t ofpacts_stub[1024 / 8];
struct ofpbuf ofpacts;
enum ofperr error;
error = reject_slave_controller(ofconn);
if (error) {
return error;
}
ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
error = ofputil_decode_flow_mod(&fm, oh, ofconn_get_protocol(ofconn),
ofproto_get_tun_tab(ofproto),
&ofproto->vl_mff_map, &ofpacts,
u16_to_ofp(ofproto->max_ports),
ofproto->n_tables);
if (!error) {
struct openflow_mod_requester req = { ofconn, oh };
error = handle_flow_mod__(ofproto, &fm, &req);
}
ofpbuf_uninit(&ofpacts);
return error;
}
這之中會call在ifp-util.c裡面的ofputil_decode_flow_mod()
/* Converts an OFPT_FLOW_MOD or NXT_FLOW_MOD message 'oh' into an abstract
* flow_mod in 'fm'. Returns 0 if successful, otherwise an OpenFlow error
* code.
*
* Uses 'ofpacts' to store the abstract OFPACT_* version of 'oh''s actions.
* The caller must initialize 'ofpacts' and retains ownership of it.
* 'fm->ofpacts' will point into the 'ofpacts' buffer.
*
* Does not validate the flow_mod actions. The caller should do that, with
* ofpacts_check(). */
enum ofperr
ofputil_decode_flow_mod(struct ofputil_flow_mod *fm,
const struct ofp_header *oh,
enum ofputil_protocol protocol,
const struct tun_table *tun_table,
const struct vl_mff_map *vl_mff_map,
struct ofpbuf *ofpacts,
ofp_port_t max_port, uint8_t max_table)
{
ovs_be16 raw_flags;
enum ofperr error;
struct ofpbuf b = ofpbuf_const_initializer(oh, ntohs(oh->length));
enum ofpraw raw = ofpraw_pull_assert(&b);
if (raw == OFPRAW_OFPT11_FLOW_MOD) { //openflow 1.1以上適用
/* Standard OpenFlow 1.1+ flow_mod. */
const struct ofp11_flow_mod *ofm;
ofm = ofpbuf_pull(&b, sizeof *ofm);
//把match解析出來
error = ofputil_pull_ofp11_match(&b, tun_table, vl_mff_map, &fm->match,
NULL);
if (error) {
return error;
}
/* Translate the message. */
fm->priority = ntohs(ofm->priority);
if (ofm->command == OFPFC_ADD
|| (oh->version == OFP11_VERSION
&& (ofm->command == OFPFC_MODIFY ||
ofm->command == OFPFC_MODIFY_STRICT)
&& ofm->cookie_mask == htonll(0))) {
/* In OpenFlow 1.1 only, a "modify" or "modify-strict" that does
* not match on the cookie is treated as an "add" if there is no
* match. */
fm->cookie = htonll(0);
fm->cookie_mask = htonll(0);
fm->new_cookie = ofm->cookie;
} else {
fm->cookie = ofm->cookie;
fm->cookie_mask = ofm->cookie_mask;
fm->new_cookie = OVS_BE64_MAX;
}
fm->modify_cookie = false;
fm->command = ofm->command;
/* Get table ID.
*
* OF1.1 entirely forbids table_id == OFPTT_ALL.
* OF1.2+ allows table_id == OFPTT_ALL only for deletes. */
fm->table_id = ofm->table_id;
if (fm->table_id == OFPTT_ALL
&& (oh->version == OFP11_VERSION
|| (ofm->command != OFPFC_DELETE &&
ofm->command != OFPFC_DELETE_STRICT))) {
return OFPERR_OFPFMFC_BAD_TABLE_ID;
}
fm->idle_timeout = ntohs(ofm->idle_timeout);
fm->hard_timeout = ntohs(ofm->hard_timeout);
if (oh->version >= OFP14_VERSION && ofm->command == OFPFC_ADD) {
fm->importance = ntohs(ofm->importance);
} else {
fm->importance = 0;
}
fm->buffer_id = ntohl(ofm->buffer_id);
error = ofputil_port_from_ofp11(ofm->out_port, &fm->out_port);
if (error) {
return error;
}
fm->out_group = (ofm->command == OFPFC_DELETE ||
ofm->command == OFPFC_DELETE_STRICT
? ntohl(ofm->out_group)
: OFPG_ANY);
raw_flags = ofm->flags;
} else { //此else之中全部屬於openflow 1.0
uint16_t command;
if (raw == OFPRAW_OFPT10_FLOW_MOD) {
/* Standard OpenFlow 1.0 flow_mod. */
const struct ofp10_flow_mod *ofm;
/* Get the ofp10_flow_mod. */
ofm = ofpbuf_pull(&b, sizeof *ofm);
/* Translate the rule. */
ofputil_match_from_ofp10_match(&ofm->match, &fm->match);
ofputil_normalize_match(&fm->match);
/* OpenFlow 1.0 says that exact-match rules have to have the
* highest possible priority. */
fm->priority = (ofm->match.wildcards & htonl(OFPFW10_ALL)
? ntohs(ofm->priority)
: UINT16_MAX);
/* Translate the message. */
command = ntohs(ofm->command);
fm->cookie = htonll(0);
fm->cookie_mask = htonll(0);
fm->new_cookie = ofm->cookie;
fm->idle_timeout = ntohs(ofm->idle_timeout);
fm->hard_timeout = ntohs(ofm->hard_timeout);
fm->importance = 0;
fm->buffer_id = ntohl(ofm->buffer_id);
fm->out_port = u16_to_ofp(ntohs(ofm->out_port));
fm->out_group = OFPG_ANY;
raw_flags = ofm->flags;
} else if (raw == OFPRAW_NXT_FLOW_MOD) {
/* Nicira extended flow_mod. */
const struct nx_flow_mod *nfm;
/* Dissect the message. */
nfm = ofpbuf_pull(&b, sizeof *nfm);
error = nx_pull_match(&b, ntohs(nfm->match_len),
&fm->match, &fm->cookie, &fm->cookie_mask,
tun_table, vl_mff_map);
if (error) {
return error;
}
/* Translate the message. */
command = ntohs(nfm->command);
if ((command & 0xff) == OFPFC_ADD && fm->cookie_mask) {
/* Flow additions may only set a new cookie, not match an
* existing cookie. */
return OFPERR_NXBRC_NXM_INVALID;
}
fm->priority = ntohs(nfm->priority);
fm->new_cookie = nfm->cookie;
fm->idle_timeout = ntohs(nfm->idle_timeout);
fm->hard_timeout = ntohs(nfm->hard_timeout);
fm->importance = 0;
fm->buffer_id = ntohl(nfm->buffer_id);
fm->out_port = u16_to_ofp(ntohs(nfm->out_port));
fm->out_group = OFPG_ANY;
raw_flags = nfm->flags;
} else {
OVS_NOT_REACHED();
}
fm->modify_cookie = fm->new_cookie != OVS_BE64_MAX;
if (protocol & OFPUTIL_P_TID) {
fm->command = command & 0xff;
fm->table_id = command >> 8;
} else {
if (command > 0xff) {
VLOG_WARN_RL(&bad_ofmsg_rl, "flow_mod has explicit table_id "
"but flow_mod_table_id extension is not enabled");
}
fm->command = command;
fm->table_id = 0xff;
}
}
if (fm->command > OFPFC_DELETE_STRICT) {
return OFPERR_OFPFMFC_BAD_COMMAND;
}
fm->ofpacts_tlv_bitmap = 0;
error = ofpacts_pull_openflow_instructions(&b, b.size, oh->version,
vl_mff_map,
&fm->ofpacts_tlv_bitmap,
ofpacts); //解析instruction
if (error) {
return error;
}
fm->ofpacts = ofpacts->data; //解完的instruction放在這
fm->ofpacts_len = ofpacts->size;
error = ofputil_decode_flow_mod_flags(raw_flags, fm->command,
oh->version, &fm->flags);
if (error) {
return error;
}
if (fm->flags & OFPUTIL_FF_EMERG) {
/* We do not support the OpenFlow 1.0 emergency flow cache, which
* is not required in OpenFlow 1.0.1 and removed from OpenFlow 1.1.
*
* OpenFlow 1.0 specifies the error code to use when idle_timeout
* or hard_timeout is nonzero. Otherwise, there is no good error
* code, so just state that the flow table is full. */
return (fm->hard_timeout || fm->idle_timeout
? OFPERR_OFPFMFC_BAD_EMERG_TIMEOUT
: OFPERR_OFPFMFC_TABLE_FULL);
}
//check有無錯誤
return ofpacts_check_consistency(fm->ofpacts, fm->ofpacts_len,
&fm->match.flow, max_port,
fm->table_id, max_table, protocol);
}
最後補充一些struct的定義
在include/openvswitch/ofp-util.h
/* Protocol-independent flow_mod.
*
* The handling of cookies across multiple versions of OpenFlow is a bit
* confusing. See DESIGN for the details. */
struct ofputil_flow_mod {
struct ovs_list list_node; /* For queuing flow_mods. */
struct match match;
int priority;
/* Cookie matching. The flow_mod affects only flows that have cookies that
* bitwise match 'cookie' bits in positions where 'cookie_mask has 1-bits.
*
* 'cookie_mask' should be zero for OFPFC_ADD flow_mods. */
ovs_be64 cookie; /* Cookie bits to match. */
ovs_be64 cookie_mask; /* 1-bit in each 'cookie' bit to match. */
/* Cookie changes.
*
* OFPFC_ADD uses 'new_cookie' as the new flow's cookie. 'new_cookie'
* should not be UINT64_MAX.
*
* OFPFC_MODIFY and OFPFC_MODIFY_STRICT have two cases:
*
* - If one or more matching flows exist and 'modify_cookie' is true,
* then the flow_mod changes the existing flows' cookies to
* 'new_cookie'. 'new_cookie' should not be UINT64_MAX.
*
* - If no matching flow exists, 'new_cookie' is not UINT64_MAX, and
* 'cookie_mask' is 0, then the flow_mod adds a new flow with
* 'new_cookie' as its cookie.
*/
ovs_be64 new_cookie; /* New cookie to install or UINT64_MAX. */
bool modify_cookie; /* Set cookie of existing flow to 'new_cookie'? */
uint8_t table_id;
uint16_t command;
uint16_t idle_timeout;
uint16_t hard_timeout;
uint32_t buffer_id;
ofp_port_t out_port;
uint32_t out_group;
enum ofputil_flow_mod_flags flags;
uint16_t importance; /* Eviction precedence. */
struct ofpact *ofpacts; /* Series of "struct ofpact"s. */
size_t ofpacts_len; /* Length of ofpacts, in bytes. */
/* Reason for delete; ignored for non-delete commands */
enum ofp_flow_removed_reason delete_reason;
};
在lib/match.h裡
struct match {
struct flow flow;
struct flow_wildcards wc;
struct tun_metadata_allocation tun_md;
};
在lib/flow.h
struct flow {
/* Metadata */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
ovs_be64 metadata; /* OpenFlow Metadata. */
uint32_t regs[FLOW_N_REGS]; /* Registers. */
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
uint32_t dp_hash; /* Datapath computed hash value. The exact
* computation is opaque to the user space. */
union flow_in_port in_port; /* Input port.*/
uint32_t recirc_id; /* Must be exact match. */
uint16_t ct_state; /* Connection tracking state. */
uint16_t ct_zone; /* Connection tracking zone. */
uint32_t ct_mark; /* Connection mark.*/
uint8_t pad1[4]; /* Pad to 64 bits. */
ovs_u128 ct_label; /* Connection label. */
uint32_t conj_id; /* Conjunction ID. */
ofp_port_t actset_output; /* Output port in action set. */
/* L2, Order the same as in the Ethernet header! (64-bit aligned) */
struct eth_addr dl_dst; /* Ethernet destination address. */
struct eth_addr dl_src; /* Ethernet source address. */
ovs_be16 dl_type; /* Ethernet frame type. */
ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
ovs_be32 mpls_lse[ROUND_UP(FLOW_MAX_MPLS_LABELS, 2)]; /* MPLS label stack
(with padding). */
/* L3 (64-bit aligned) */
ovs_be32 nw_src; /* IPv4 source address. */
ovs_be32 nw_dst; /* IPv4 destination address. */
struct in6_addr ipv6_src; /* IPv6 source address. */
struct in6_addr ipv6_dst; /* IPv6 destination address. */
ovs_be32 ipv6_label; /* IPv6 flow label. */
uint8_t nw_frag; /* FLOW_FRAG_* flags. */
uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
uint8_t nw_ttl; /* IP TTL/Hop Limit. */
uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */
struct eth_addr arp_sha; /* ARP/ND source hardware address. */
struct eth_addr arp_tha; /* ARP/ND target hardware address. */
ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */
ovs_be16 pad3; /* Pad to 64 bits. */
/* L4 (64-bit aligned) */
ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */
ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */
ovs_be32 igmp_group_ip4; /* IGMP group IPv4 address.
* Keep last for BUILD_ASSERT_DECL below. */
};
在include/openvswitch/ofp-actions.h
struct ofpact {
/* We want the space advantage of an 8-bit type here on every
* implementation, without giving up the advantage of having a useful type
* on implementations that support packed enums. */
#ifdef HAVE_PACKED_ENUM
enum ofpact_type type; /* OFPACT_*. */
#else
uint8_t type; /* OFPACT_* */
#endif
uint8_t raw; /* Original type when added, if any. */
uint16_t len; /* Length of the action, in bytes, including
* struct ofpact, excluding padding. */
};
struct ofp_action_header {
/* The meaning of other values of 'type' generally depends on the OpenFlow
* version (see enum ofp_raw_action_type).
*
* Across all OpenFlow versions, OFPAT_VENDOR indicates that 'vendor'
* designates an OpenFlow vendor ID and that the remainder of the action
* structure has a vendor-defined meaning.
*/
#define OFPAT_VENDOR 0xffff
ovs_be16 type;
/* Always a multiple of 8. */
ovs_be16 len;
/* For type == OFPAT_VENDOR only, this is a vendor ID, e.g. NX_VENDOR_ID or
* ONF_VENDOR_ID. Other 'type's use this space for some other purpose. */
ovs_be32 vendor;
};
/* The ovs_be<N> types indicate that an object is in big-endian, not
* native-endian, byte order. They are otherwise equivalent to uint<N>_t. */
typedef uint16_t OVS_BITWISE ovs_be16;
typedef uint32_t OVS_BITWISE ovs_be32;
typedef uint64_t OVS_BITWISE ovs_be64;
最後補充一個用法
wireshark裡面action欄位看到的port如果要讀出來的話
就讀ofp_to_u16(ofpact_get_OUTPUT(fm->ofpacts)->port),為一unsign 16 bits int
ref : http://jameslee-blog.logdown.com/posts/260303-open-vswitch-into-controller-calls-the-function
http://www.sdnlab.com/15786.html
留言列表