commit_lb_aff action translates to an openflow "learn" action that inserts a new flow in the OFTABLE_CHK_LB_AFFINITY table. The new flow is used to match on the the 5-tuple and set REGBIT_KNOWN_LB_SESSION bit. Moreover the new flow stores backend IP and port in register REG4 and REG8[0..15] respectively.
Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> --- controller/lflow.h | 1 + include/ovn/actions.h | 15 ++ include/ovn/logical-fields.h | 3 + lib/actions.c | 370 +++++++++++++++++++++++++++++++++++ ovn-sb.xml | 35 ++++ tests/ovn.at | 10 + utilities/ovn-trace.c | 2 + 7 files changed, 436 insertions(+) diff --git a/controller/lflow.h b/controller/lflow.h index 8cbe312ca..4be079555 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -79,6 +79,7 @@ struct uuid; #define OFTABLE_CHK_OUT_PORT_SEC 75 #define OFTABLE_ECMP_NH_MAC 76 #define OFTABLE_ECMP_NH 77 +#define OFTABLE_CHK_LB_AFFINITY 78 enum ref_type { REF_TYPE_ADDRSET, diff --git a/include/ovn/actions.h b/include/ovn/actions.h index d7ee84dac..597cbb8e3 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -121,6 +121,7 @@ struct ovn_extend_table; OVNACT(COMMIT_ECMP_NH, ovnact_commit_ecmp_nh) \ OVNACT(CHK_ECMP_NH_MAC, ovnact_result) \ OVNACT(CHK_ECMP_NH, ovnact_result) \ + OVNACT(COMMIT_LB_AFF, ovnact_commit_lb_aff) \ /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */ enum OVS_PACKED_ENUM ovnact_type { @@ -463,6 +464,20 @@ struct ovnact_commit_ecmp_nh { uint8_t proto; }; +/* OVNACT_COMMIT_LB_AFF. */ +struct ovnact_commit_lb_aff { + struct ovnact ovnact; + + struct in6_addr vip; + uint16_t vip_port; + uint8_t proto; + + struct in6_addr backend; + uint16_t backend_port; + + uint16_t timeout; +}; + /* Internal use by the helpers below. */ void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h index 3db7265e4..52f40de38 100644 --- a/include/ovn/logical-fields.h +++ b/include/ovn/logical-fields.h @@ -71,6 +71,7 @@ enum mff_log_flags_bits { MLF_USE_SNAT_ZONE = 11, MLF_CHECK_PORT_SEC_BIT = 12, MLF_LOOKUP_COMMIT_ECMP_NH_BIT = 13, + MLF_COMMIT_LB_AFF_BIT = 14, }; /* MFF_LOG_FLAGS_REG flag assignments */ @@ -116,6 +117,8 @@ enum mff_log_flags { MLF_LOCALPORT = (1 << MLF_LOCALPORT_BIT), MLF_LOOKUP_COMMIT_ECMP_NH = (1 << MLF_LOOKUP_COMMIT_ECMP_NH_BIT), + + MLF_COMMIT_LB_AFF = (1 << MLF_COMMIT_LB_AFF_BIT), }; /* OVN logical fields diff --git a/lib/actions.c b/lib/actions.c index adbb42db4..00ab9a787 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -4600,6 +4600,374 @@ encode_CHK_ECMP_NH(const struct ovnact_result *res, MLF_LOOKUP_COMMIT_ECMP_NH_BIT, ofpacts); } +static void +parse_commit_lb_aff(struct action_context *ctx, + struct ovnact_commit_lb_aff *lb_aff) +{ + uint16_t timeout, port = 0; + char *ip_str; + int family; + + lexer_force_match(ctx->lexer, LEX_T_LPAREN); /* Skip '('. */ + if (!lexer_match_id(ctx->lexer, "vip")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (ctx->lexer->token.type != LEX_T_STRING) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, + &port, &family)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (family == AF_INET) { + ovs_be32 ip4; + ip_parse(ip_str, &ip4); + in6_addr_set_mapped_ipv4(&lb_aff->vip, ip4); + } else { + ipv6_parse(ip_str, &lb_aff->vip); + } + + lb_aff->vip_port = port; + free(ip_str); + + lexer_get(ctx->lexer); + lexer_force_match(ctx->lexer, LEX_T_COMMA); + + if (!lexer_match_id(ctx->lexer, "backend")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (ctx->lexer->token.type != LEX_T_STRING) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!ip_address_and_port_from_lb_key(ctx->lexer->token.s, &ip_str, + &port, &family)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (family == AF_INET) { + ovs_be32 ip4; + ip_parse(ip_str, &ip4); + in6_addr_set_mapped_ipv4(&lb_aff->backend, ip4); + } else { + ipv6_parse(ip_str, &lb_aff->backend); + } + + lb_aff->backend_port = port; + free(ip_str); + + lexer_get(ctx->lexer); + lexer_force_match(ctx->lexer, LEX_T_COMMA); + + if (lb_aff->vip_port) { + if (!lexer_match_id(ctx->lexer, "proto")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + + if (lexer_match_id(ctx->lexer, "tcp")) { + lb_aff->proto = IPPROTO_TCP; + } else if (lexer_match_id(ctx->lexer, "udp")) { + lb_aff->proto = IPPROTO_UDP; + } else if (lexer_match_id(ctx->lexer, "sctp")) { + lb_aff->proto = IPPROTO_SCTP; + } else { + lexer_syntax_error(ctx->lexer, "invalid protocol"); + return; + } + lexer_force_match(ctx->lexer, LEX_T_COMMA); + } + + if (!lexer_match_id(ctx->lexer, "timeout")) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) { + lexer_syntax_error(ctx->lexer, "invalid parameter"); + return; + } + if (!action_parse_uint16(ctx, &timeout, "affinity timeout")) { + return; + } + lb_aff->timeout = timeout; + + lexer_force_match(ctx->lexer, LEX_T_RPAREN); /* Skip ')'. */ + +} + +static void +format_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, struct ds *s) +{ + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->vip)) { + char ip_str[INET6_ADDRSTRLEN] = {}; + inet_ntop(AF_INET6, &lb_aff->vip, ip_str, INET6_ADDRSTRLEN); + ds_put_format(s, "commit_lb_aff(vip = \"[%s]", ip_str); + } else { + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->vip); + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); + ds_put_format(s, "commit_lb_aff(vip = \"%s", ip_str); + free(ip_str); + } + if (lb_aff->vip_port) { + ds_put_format(s, ":%d", lb_aff->vip_port); + } + ds_put_cstr(s, "\""); + + if (!IN6_IS_ADDR_V4MAPPED(&lb_aff->backend)) { + char ip_str[INET6_ADDRSTRLEN] = {}; + inet_ntop(AF_INET6, &lb_aff->backend, ip_str, INET6_ADDRSTRLEN); + ds_put_format(s, ", backend = \"[%s]", ip_str); + } else { + ovs_be32 ip = in6_addr_get_mapped_ipv4(&lb_aff->backend); + char *ip_str = xasprintf(IP_FMT, IP_ARGS(ip)); + ds_put_format(s, ", backend = \"%s", ip_str); + free(ip_str); + } + if (lb_aff->backend_port) { + ds_put_format(s, ":%d", lb_aff->backend_port); + } + ds_put_cstr(s, "\""); + + if (lb_aff->proto) { + const char *proto; + switch (lb_aff->proto) { + case IPPROTO_UDP: + proto = "udp"; + break; + case IPPROTO_SCTP: + proto = "sctp"; + break; + case IPPROTO_TCP: + default: + proto = "tcp"; + break; + } + ds_put_format(s, ", proto = %s", proto); + } + ds_put_format(s, ", timeout = %d);", lb_aff->timeout); +} + +static void +encode_COMMIT_LB_AFF(const struct ovnact_commit_lb_aff *lb_aff, + const struct ovnact_encode_params *ep OVS_UNUSED, + struct ofpbuf *ofpacts) +{ + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&lb_aff->vip); + size_t ol_offset = ofpacts->size; + struct ofpact_learn *ol = ofpact_put_LEARN(ofpacts); + struct match match = MATCH_CATCHALL_INITIALIZER; + struct ofpact_learn_spec *ol_spec; + unsigned int imm_bytes; + uint8_t *src_imm; + + ol->flags = NX_LEARN_F_DELETE_LEARNED; + ol->idle_timeout = lb_aff->timeout; /* seconds. */ + ol->priority = OFP_DEFAULT_PRIORITY; + ol->table_id = OFTABLE_CHK_LB_AFFINITY; + + /* Match on the same ETH type as the packet that created the new table. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = mf_from_id(MFF_ETH_TYPE); + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + union mf_value imm_eth_type = { + .be16 = ipv6 ? htons(ETH_TYPE_IPV6) : htons(ETH_TYPE_IP) + }; + mf_write_subfield_value(&ol_spec->dst, &imm_eth_type, &match); + /* Push value last, as this may reallocate 'ol_spec'. */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_eth_type, imm_bytes); + + /* IP src. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); + ol_spec->src.field = + ipv6 ? mf_from_id(MFF_IPV6_SRC) : mf_from_id(MFF_IPV4_SRC); + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_FIELD; + + /* IP dst. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = + ipv6 ? mf_from_id(MFF_IPV6_DST) : mf_from_id(MFF_IPV4_DST); + union mf_value imm_ip; + if (ipv6) { + imm_ip = (union mf_value) { + .ipv6 = lb_aff->vip, + }; + } else { + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->vip); + imm_ip = (union mf_value) { + .be32 = ip4, + }; + } + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + mf_write_subfield_value(&ol_spec->dst, &imm_ip, &match); + + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_ip, imm_bytes); + + if (lb_aff->proto) { + /* IP proto. */ + union mf_value imm_proto = { + .u8 = lb_aff->proto, + }; + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = mf_from_id(MFF_IP_PROTO); + ol_spec->src.field = mf_from_id(MFF_IP_PROTO); + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + mf_write_subfield_value(&ol_spec->dst, &imm_proto, &match); + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_proto, imm_bytes); + + /* dst port */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + switch (lb_aff->proto) { + case IPPROTO_TCP: + ol_spec->dst.field = mf_from_id(MFF_TCP_DST); + ol_spec->src.field = mf_from_id(MFF_TCP_DST); + break; + case IPPROTO_UDP: + ol_spec->dst.field = mf_from_id(MFF_UDP_DST); + ol_spec->src.field = mf_from_id(MFF_UDP_DST); + break; + case IPPROTO_SCTP: + ol_spec->dst.field = mf_from_id(MFF_SCTP_DST); + ol_spec->src.field = mf_from_id(MFF_SCTP_DST); + break; + default: + OVS_NOT_REACHED(); + break; + } + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_MATCH; + ol_spec->src_type = NX_LEARN_SRC_FIELD; + } + + /* Set MLF_LOOKUP_COMMIT_ECMP_NH_BIT for ecmp replies. */ + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + ol_spec->dst.field = mf_from_id(MFF_LOG_FLAGS); + ol_spec->dst.ofs = MLF_COMMIT_LB_AFF_BIT; + ol_spec->dst.n_bits = 1; + ol_spec->n_bits = ol_spec->dst.n_bits; + ol_spec->dst_type = NX_LEARN_DST_LOAD; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + union mf_value imm_reg_value = { + .u8 = 1 + }; + mf_write_subfield_value(&ol_spec->dst, &imm_reg_value, &match); + + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + ol = ofpacts->header; + memcpy(src_imm, &imm_reg_value, imm_bytes); + + /* Load backend IP in REG4/XXREG1. */ + union mf_value imm_backend_ip; + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + + if (ipv6) { + imm_backend_ip = (union mf_value) { + .ipv6 = lb_aff->backend, + }; + ol_spec->dst.field = mf_from_id(MFF_XXREG1); + } else { + ovs_be32 ip4 = in6_addr_get_mapped_ipv4(&lb_aff->backend); + imm_backend_ip = (union mf_value) { + .be32 = ip4, + }; + ol_spec->dst.field = mf_from_id(MFF_REG4); + } + + ol_spec->dst_type = NX_LEARN_DST_LOAD; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = ol_spec->dst.field->n_bits; + ol_spec->n_bits = ol_spec->dst.n_bits; + mf_write_subfield_value(&ol_spec->dst, &imm_backend_ip, &match); + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_backend_ip, imm_bytes); + + if (lb_aff->backend_port) { + /* Load backend port in REG8. */ + union mf_value imm_backend_port; + ol_spec = ofpbuf_put_zeros(ofpacts, sizeof *ol_spec); + imm_backend_port = (union mf_value) { + .be16 = htons(lb_aff->backend_port), + }; + + ol_spec->dst.field = mf_from_id(MFF_REG8); + ol_spec->dst_type = NX_LEARN_DST_LOAD; + ol_spec->src_type = NX_LEARN_SRC_IMMEDIATE; + ol_spec->dst.ofs = 0; + ol_spec->dst.n_bits = 8 * sizeof(lb_aff->backend_port); + ol_spec->n_bits = ol_spec->dst.n_bits; + mf_write_subfield_value(&ol_spec->dst, &imm_backend_port, &match); + /* Push value last, as this may reallocate 'ol_spec' */ + imm_bytes = DIV_ROUND_UP(ol_spec->dst.n_bits, 8); + src_imm = ofpbuf_put_zeros(ofpacts, OFPACT_ALIGN(imm_bytes)); + memcpy(src_imm, &imm_backend_port, imm_bytes); + } + + ol = ofpbuf_at_assert(ofpacts, ol_offset, sizeof *ol); + ofpact_finish_LEARN(ofpacts, &ol); +} + +static void +ovnact_commit_lb_aff_free(struct ovnact_commit_lb_aff *ecmp_nh OVS_UNUSED) +{ +} + /* Parses an assignment or exchange or put_dhcp_opts action. */ static void parse_set_action(struct action_context *ctx) @@ -4790,6 +5158,8 @@ parse_action(struct action_context *ctx) parse_put_fdb(ctx, ovnact_put_PUT_FDB(ctx->ovnacts)); } else if (lexer_match_id(ctx->lexer, "commit_ecmp_nh")) { parse_commit_ecmp_nh(ctx, ovnact_put_COMMIT_ECMP_NH(ctx->ovnacts)); + } else if (lexer_match_id(ctx->lexer, "commit_lb_aff")) { + parse_commit_lb_aff(ctx, ovnact_put_COMMIT_LB_AFF(ctx->ovnacts)); } else { lexer_syntax_error(ctx->lexer, "expecting action"); } diff --git a/ovn-sb.xml b/ovn-sb.xml index 315d60853..2c0b8d93f 100644 --- a/ovn-sb.xml +++ b/ovn-sb.xml @@ -2624,6 +2624,41 @@ tcp.flags = RST; register <var>R</var> is set to 1. </p> </dd> + + <dt> + <code> + commit_lb_aff(<var>vip</var>, <var>backend</var>, + <var>proto</var>, <var>timeout</var>); + </code> + </dt> + <dd> + <p> + <b>Parameters</b>: load-balancer virtual ip:port <var>vip</var>, + load-balancer backend ip:port <var>backend</var>, load-balancer + protocol <var>proto</var>, affinity timeout <var>timeout</var>. + </p> + + <p> + This action translates to an openflow "learn" action that inserts + a new flow in table 78. + </p> + + <ul> + <li> + Match on the 4-tuple in table 78: <code>nw_src=ip client</code>, + <code>nw_dst=vip ip</code>, <code>ip_proto</code>, + <code>tp_dst=vip port</code> and set <code>reg9[6]</code> to 1, + <code>reg4</code> and <code>reg8</code> to backend ip and port + respectively. For IPv6 register <code>xxreg1</code> is used to + store the backend ip. + </li> + </ul> + + <p> + This action is applied for new connection received by a specific + load-balancer. + </p> + </dd> </dl> </column> diff --git a/tests/ovn.at b/tests/ovn.at index f8b8db4df..1678f8e30 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2125,6 +2125,16 @@ reg9[5] = chk_ecmp_nh_mac(); reg9[5] = chk_ecmp_nh(); encodes as set_field:0/0x2000->reg10,resubmit(,77),move:NXM_NX_REG10[13]->OXM_OF_PKT_REG4[5] +# commit_lb_aff +commit_lb_aff(vip = "172.16.0.123:8080", backend = "10.0.0.3:8080", proto = tcp, timeout = 30); + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.0.123,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0xa000003->NXM_NX_REG4[],load:0x1f90->NXM_NX_REG8[0..15]) + +commit_lb_aff(vip = "172.16.0.123", backend = "10.0.0.3", timeout = 30); + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x800,NXM_OF_IP_SRC[],ip_dst=172.16.0.123,load:0x1->NXM_NX_REG10[14],load:0xa000003->NXM_NX_REG4[]) + +commit_lb_aff(vip = "[::1]:8080", backend = "[::2]:8080", proto = tcp, timeout = 30); + encodes as learn(table=78,idle_timeout=30,delete_learned,eth_type=0x86dd,NXM_NX_IPV6_SRC[],ipv6_dst=::1,nw_proto=6,NXM_OF_TCP_DST[],load:0x1->NXM_NX_REG10[14],load:0x2->NXM_NX_XXREG1[],load:0x1f90->NXM_NX_REG8[0..15]) + # push/pop push(xxreg0);push(xxreg1[10..20]);push(eth.src);pop(xxreg0[0..47]);pop(xxreg0[48..57]);pop(xxreg1); formats as push(xxreg0); push(xxreg1[10..20]); push(eth.src); pop(xxreg0[0..47]); pop(xxreg0[48..57]); pop(xxreg1); diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c index d9e7129d9..5adfd2521 100644 --- a/utilities/ovn-trace.c +++ b/utilities/ovn-trace.c @@ -3298,6 +3298,8 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, break; case OVNACT_CHK_ECMP_NH: break; + case OVNACT_COMMIT_LB_AFF: + break; } } ofpbuf_uninit(&stack); -- 2.37.3 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev