unregister a net device

unregister_netdev is used to delete a net device. In fact, it equals:

1
2
3
4
rtnl_lock();
rollback_registered_many( a temp list with a single net device);
list_add_tail(&dev->todo_list, &net_todo_list);
rtnl_unlock();

a temporary list stores a single net device, which is to be deleted.
net_todo_list stores all the net devices are being deleted.

The core function is rollback_registered_many, which efficiently deletes many devices in a list.
But here, in this case, one a single netdevice in the list.

Read More

draft: how to pick next task

summary

There are four sched_class,
stop_sched_class --> rt_sched_class --> fair_sched_class --> idle_sched_class

They are linked one by one staticly by struct sched_class->next by their defination.

Each sched_class has method pick_next_task, which is used to select
a perfect process to run from each sched_class‘s runqueue.

When we need schedule, the four pick_next_task will be called one by one.

As a optimization, most time there is no rt task in running state,
in this case we can directly call fair_sched_class.

Read More

draft: isolcpus

Isolcpus

kthread has wrong affinity when use isolcpus in bootline

when boot kernel with isolcpus in grub command lines, only init thread has expected affinity, which exclude the isolated cpus.

while the kthreads affinity still includes isolated cpus.

Read More

PROMISC in net device->flag

summary

promisc is one bit of struct net_device’s flag, which is used to indicate if a device is in promisc status.

1
2
3
4
5
6
7
8
9
10
11
12
30 /* Standard interface flags (netdevice->flags). */
31 #define IFF_UP 0x1 /* interface is up */
32 #define IFF_BROADCAST 0x2 /* broadcast address valid */
33 #define IFF_DEBUG 0x4 /* turn on debugging */
34 #define IFF_LOOPBACK 0x8 /* is a loopback net */
35 #define IFF_POINTOPOINT 0x10 /* interface is has p-p link */
36 #define IFF_NOTRAILERS 0x20 /* avoid use of trailers */
37 #define IFF_RUNNING 0x40 /* interface RFC2863 OPER_UP */
38 #define IFF_NOARP 0x80 /* no ARP protocol */
39 #define IFF_PROMISC 0x100 /* receive all packets */
40 #define IFF_ALLMULTI 0x200 /* receive all multicast packets*/
...

There are two kinds of operataion, could cause a NIC enter/leave promisc status.

  1. ip command
    run mutli on command, just need one off to recover.
1
2
  	ip link set dev eth0 promisc on
ip link set dev eth0 promisc off
  1. tcpdump command
    When tcpdump starts, it let dev to promisc,
    and just before exit, tcpdump let dev left promisc.
    All these is done by call kernel api dev_set_promiscuity.

Read More

register_pernet_subsys 笔记

pernet ops

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/**
* register_pernet_subsys - register a network namespace subsystem
* @ops: pernet operations structure for the subsystem
*
* Register a subsystem which has init and exit functions
* that are called when network namespaces are created and
* destroyed respectively.
*
* When registered all network namespace init functions are
* called for every existing network namespace. Allowing kernel
* modules to have a race free view of the set of network namespaces.
*
* When a new network namespace is created all of the init
* methods are called in the order in which they were registered.
*
* When a network namespace is destroyed all of the exit methods
* are called in the reverse of the order with which they were
* registered.
*/
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
error = register_pernet_operations(first_device, ops);
mutex_unlock(&net_mutex);
return error;
}
===>static int register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
error = __register_pernet_operations(list, ops);
}

======>#ifdef CONFIG_NET_NS
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
LIST_HEAD(net_exit_list);

list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
for_each_net(net) {
error = ops_init(ops, net);
if (error)
goto out_undo;
list_add_tail(&net->exit_list, &net_exit_list);
}
}
return 0;


========>#ifdef CONFIG_NET_NS
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
struct net *net;
int error;
LIST_HEAD(net_exit_list);

list_add_tail(&ops->list, list);
if (ops->init || (ops->id && ops->size)) {
for_each_net(net) {
=============> error = ops_init(ops, net);
if (error)
goto out_undo;
=============> list_add_tail(&net->exit_list, &net_exit_list);<<< confused?!!! net_exit_list局部变量?

}
}
return 0;


=============>static int ops_init(const struct pernet_operations *ops, struct net *net)
{
int err;
if (ops->id && ops->size) {
void *data = kzalloc(ops->size, GFP_KERNEL);
if (!data)
return -ENOMEM;

err = net_assign_generic(net, *ops->id, data);
if (err) {
kfree(data);
return err;
}
}
if (ops->init)
return ops->init(net);<====== the ops->init will be called.
return 0;
}

Fox example

inet6_init in pernet.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static struct pernet_operations inet6_net_ops = { 
.init = inet6_net_init,
.exit = inet6_net_exit,
};
static int __init inet6_init(void)
{
.....
err = register_pernet_subsys(&inet6_net_ops);
if (err)
goto register_pernet_fail;
.....
}
call: ops->init(net);<====== the ops->init will be called.
equal with ======= .init = inet6_net_init,
inet6_net_init(net);