For the packet to localhost, ip_local_deliver_finish
will be the last funciton called by network layer.
In ip_local_deliver_finish
, it will be process the protocol hander of a array element of inet_protos
according the protocol
value in IPv4 header.
IPv6 is very similar vs IPv4 except the name is a bit different.
Data structure for IPv4 net_protocol
1 2 3 4 5 6 7 8 9 10 11 12 13 14 34 38 #define MAX_INET_PROTOS 256 39 40 41 struct net_protocol {42 void (*early_demux)(struct sk_buff *skb);43 int (*handler)(struct sk_buff *skb);44 void (*err_handler)(struct sk_buff *skb, u32 info);45 unsigned int no_policy:1 ,46 netns_ok:1 ;47 }
The handler
is the main part, it will be called to process upper protol over IPv4.
Fox example: tcpv4 will be tcp_v4_rcv
.
1 2 3 4 5 6 7 1540 static const struct net_protocol tcp_protocol = { 1541 .early_demux = tcp_v4_early_demux, 1542 .handler = tcp_v4_rcv, 1543 .err_handler = tcp_v4_err, 1544 .no_policy = 1, 1545 .netns_ok = 1, 1546 };
pointer array for all protocols There is a pointer array inet_protos
to collect all the supported protocols by IPv4.
1 2 31 const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; 32 const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
The ‘ADD’ and ‘DEL” operation is done by inet_add_protocol
and inet_del_protocol
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 34 /* 35 * Add a protocol handler to the hash tables 36 */ 37 38 int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 39 { 40 if (!prot->netns_ok) { 41 pr_err("Protocol %u is not namespace aware, cannot register.\n", 42 protocol); 43 return -EINVAL; 44 } 45 46 return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], 47 NULL, prot) ? 0 : -1; 48 } 49 EXPORT_SYMBOL(inet_add_protocol);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 58 /* 59 * Remove a protocol from the hash tables. 60 */ 61 62 int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 63 { 64 int ret; 65 66 ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol], 67 prot, NULL) == prot) ? 0 : -1; 68 69 synchronize_net(); 70 71 return ret; 72 } 73 EXPORT_SYMBOL(inet_del_protocol); 74
All registered protocol handler.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 $ grep inet_add_protocol net/ -Rw net/dccp/ipv4.c: err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); net/sctp/protocol.c: if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) net/l2tp/l2tp_ip.c: err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP); net/ipv4/ipmr.c: if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { net/ipv4/esp4.c: if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) { net/ipv4/protocol.c:int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) net/ipv4/protocol.c:EXPORT_SYMBOL(inet_add_protocol); net/ipv4/af_inet.c: if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) net/ipv4/af_inet.c: if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) net/ipv4/af_inet.c: if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) net/ipv4/af_inet.c: if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) net/ipv4/ah4.c: if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) { net/ipv4/tunnel4.c: if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) { net/ipv4/tunnel4.c: if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { net/ipv4/ipcomp.c: if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) { net/ipv4/udplite.c: if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) net/ipv4/gre_demux.c: if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
regiter protocol handler for tcp, udp and icmp. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 1670 static int __init inet_init(void) 1671 { ... 1710 /* 1711 * Add all the base protocols. 1712 */ 1713 1714 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) 1715 pr_crit("%s: Cannot add ICMP protocol\n", __func__); 1716 if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) 1717 pr_crit("%s: Cannot add UDP protocol\n", __func__); 1718 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) 1719 pr_crit("%s: Cannot add TCP protocol\n", __func__); ...
call protocol handler in the end of IPv4 process. NOTE:
no_policy
: if it is set, xfrm4_policy_check
will be postponed and done by handler itself. such as tcp
, udp
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 190 static int ip_local_deliver_finish(struct sk_buff *skb) 191 { 192 struct net *net = dev_net(skb->dev); 193 194 __skb_pull(skb, skb_network_header_len(skb)); 195 196 rcu_read_lock(); 197 { 198 int protocol = ip_hdr(skb)->protocol; 199 const struct net_protocol *ipprot; 200 int raw; 201 202 resubmit: 203 raw = raw_local_deliver(skb, protocol); 204 205 ipprot = rcu_dereference(inet_protos[protocol]); 206 if (ipprot != NULL) { 207 int ret; 208 209 if (!ipprot->no_policy) { 210 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 211 kfree_skb(skb); 212 goto out; 213 } 214 nf_reset(skb); 215 } 216 ret = ipprot->handler(skb); 217 if (ret < 0) { 218 protocol = -ret; 219 goto resubmit; 220 } 221 IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); 222 } else { 223 if (!raw) { 224 if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 225 IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); 226 icmp_send(skb, ICMP_DEST_UNREACH, 227 ICMP_PROT_UNREACH, 0); 228 } 229 kfree_skb(skb); 230 } else { 231 IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); 232 consume_skb(skb); 233 } 234 } 235 } 236 out: 237 rcu_read_unlock(); 238 239 return 0; 240 }
IPV6 part struct inet6_protocol
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 49 #if IS_ENABLED(CONFIG_IPV6) 50 struct inet6_protocol { 51 void (*early_demux)(struct sk_buff *skb); 52 53 int (*handler)(struct sk_buff *skb); 54 55 void (*err_handler)(struct sk_buff *skb, 56 struct inet6_skb_parm *opt, 57 u8 type, u8 code, int offset, 58 __be32 info); 59 unsigned int flags; /* INET6_PROTO_xxx */ 60 }; 61 62 #define INET6_PROTO_NOPOLICY 0x1 63 #define INET6_PROTO_FINAL 0x2 64 #endif
ADD and DEL 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 29 const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; 30 EXPORT_SYMBOL(inet6_protos); 31 32 int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 33 { 34 return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], 35 NULL, prot) ? 0 : -1; 36 } 37 EXPORT_SYMBOL(inet6_add_protocol); 38 39 /* 40 * Remove a protocol from the hash tables. 41 */ 42 43 int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) 44 { 45 int ret; 46 47 ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], 48 prot, NULL) == prot) ? 0 : -1; 49 50 synchronize_net(); 51 52 return ret; 53 } 54 EXPORT_SYMBOL(inet6_del_protocol);
protocol handlers 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 $ grep inet6_add_protocol net/ -Rw net/dccp/ipv6.c: err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); net/sctp/ipv6.c: if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0) net/ipv6/exthdrs.c: ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); net/ipv6/exthdrs.c: ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS); net/ipv6/exthdrs.c: ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE); net/ipv6/icmp.c: if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) net/ipv6/ip6mr.c: if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) { net/ipv6/esp6.c: if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { net/ipv6/protocol.c:int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) net/ipv6/protocol.c:EXPORT_SYMBOL(inet6_add_protocol); net/ipv6/ip6_gre.c: err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); net/ipv6/reassembly.c: ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); net/ipv6/ipcomp6.c: if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { net/ipv6/udp.c: ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); net/ipv6/ah6.c: if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { net/ipv6/tcp_ipv6.c: ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); net/ipv6/udplite.c: ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); net/ipv6/tunnel6.c: if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { net/ipv6/tunnel6.c: if (inet6_add_protocol(&tunnel46_protocol, IPPROTO_IPIP)) { net/l2tp/l2tp_ip6.c: err = inet6_add_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
1 2 3 4 5 6 1951 static const struct inet6_protocol tcpv6_protocol = { 1952 .early_demux = tcp_v6_early_demux, 1953 .handler = tcp_v6_rcv, 1954 .err_handler = tcp_v6_err, 1955 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 1956 };
call protocol handler in the end of ipv6 process. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 200 201 static int ip6_input_finish(struct sk_buff *skb) 202 { ... 222 raw = raw6_local_deliver(skb, nexthdr); 223 if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { 224 int ret; 225 226 if (ipprot->flags & INET6_PROTO_FINAL) { 227 const struct ipv6hdr *hdr; 228 229 /* Free reference early: we don't need it any more, 230 and it may hold ip_conntrack module loaded 231 indefinitely. */ 232 nf_reset(skb); 233 234 skb_postpull_rcsum(skb, skb_network_header(skb), 235 skb_network_header_len(skb)); 236 hdr = ipv6_hdr(skb); 237 if (ipv6_addr_is_multicast(&hdr->daddr) && 238 !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, 239 &hdr->saddr) && 240 !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) 241 goto discard; 242 } 243 if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && 244 !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 245 goto discard; 246 247 ret = ipprot->handler(skb); 248 if (ret > 0) 249 goto resubmit; 250 else if (ret == 0) 251 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); 252 } else { ...