Summary Part 1: Register multi queue net device. In this part, only the framework is prepared for qdisc, and the noop_qdisc
is set as default.
prepare netdev_queue
s. for example: intel igb hardware has 8 hardware tx queue, and nic driver create 8 corresponding struct netdev_queue
in the _tx
of struct net_device
.
prepare mq_qdisc
The mq_qdisc
is attached to the corresponding device. In mq_qdisc
private field, a default qdisc will be create for each NIC’s hardware queue. This is done in mq_init
. The default qdisc is pfifo_fast_ops
.
attach mq_qdisc
to netdev_queue
. In mq_attach
, these qdiscs are attatched to correspondingstruct netdev_queue
.
Part 2: Active a net device with right qdiscs Here only trace with the case mq_qdisc
. When dev is up, dev_open
is called, which will call dev_activate
.
2.1 A qdisc with mq_qdisc_ops
is alloced and assigned to dev->qdisc
, The mq qdisc
is not a simple qdisc, which includes sub-qdisc in its private fields. Each sub-qdisc is a default qdisc(pfif0).
2.2 when attach
a qdisc to a device by qdisc->attach
(which equal mq_attach
), in mq_attach
, each qdisc alloc in 2.1, will be assigned to a netdevice queue(which is alloced in part1).
by now, each netdevice queue has a qdisc, but the qdisc is assigned toqdisc_sleeping
of netdev_queue
.
at last, dev_activate
call transition_one_qdisc
for each tx queue, to set netdev_queue->qdisc
with netdev_queue->qdisc_sleeping
.
attach_default_qdiscs
will be called twice time, one is called to create with mq_qdisc_ops
, one is called to create with default_qdisc_ops
,
Data Structure struct net_device
and netdev_queue
1 2 3 4 5 6 struct net_device { ... 1350 struct netdev_queue *_tx ____cacheline_aligned_in_smp ;1351 1352 1353 unsigned int num_tx_queues;
1 2 3 4 5 6 7 8 9 544 struct netdev_queue { ... 548 struct net_device *dev ; 549 struct Qdisc *qdisc ; 550 struct Qdisc *qdisc_sleeping ; ... 560 spinlock_t _xmit_lock ____cacheline_aligned_in_smp; ... 573 unsigned long state;
1 2 3 21 struct mq_sched {22 struct Qdisc **qdiscs ;23 };
Call trace. Part 1 1 2 3 4 5 6 7 8 9 10 11 12 13 > igb_probe > > alloc_etherdev_mq > > > alloc_etherdev_mqs > > > > alloc_netdev_mqs > > > > > ether_setup > > > > > netif_alloc_netdev_queues > > > > > > netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL ); > > > > > > > netdev_init_one_queue > > register_netdev > > > register_netdevice > > > > dev_init_scheduler > > > > > netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); > > > > > > dev_init_scheduler_queue
NOTE: if disable RPS(google patch), there is no soft queue to receive packet. because there is igb_ring
in the driver.
Part 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 > dev_open > > __dev_open > > > dev_activate > > > > attach_default_qdiscs(dev); > > > > > qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); > > > > > > qdisc_alloc > > > > > > ops->init(equal: mq_init) > > > > > > > for (ntx = 0 ; ntx < dev->num_tx_queues; ntx++) { > > > > > > > > qdisc_create_dflt(dev_queue, default_qdisc_ops ... > > > > > > > > > qdisc_alloc > > > > > > > > > ops->init > > > > > qdisc->ops->attach > > > > > dev_graft_qdisc > > > > netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); > > > > > transition_one_qdisc
Functions in Part 1. 1 2 3 4 5 6 7 8 9 10 11 12 13 2016 static int igb_probe (struct pci_dev *pdev, const struct pci_device_id *ent) 2017 { ... 2067 netdev = alloc_etherdev_mq(sizeof (struct igb_adapter),2068 IGB_MAX_TX_QUEUES);2069 if (!netdev)2070 goto err_alloc_etherdev;... 2317 strcpy (netdev->name, "eth%d" );2318 err = register_netdev(netdev);2319 if (err)2320 goto err_register;...
1 51 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 372 386 387 struct net_device *alloc_etherdev_mqs (int sizeof_priv, unsigned int txqs, 388 unsigned int rxqs)389 { 390 return alloc_netdev_mqs(sizeof_priv, "eth%d" , ether_setup, txqs, rxqs);391 }392 EXPORT_SYMBOL(alloc_etherdev_mqs);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 6223 6235 struct net_device *alloc_netdev_mqs (int sizeof_priv, const char *name, 6236 void (*setup)(struct net_device *),6237 unsigned int txqs, unsigned int rxqs)6238 { 6239 struct net_device *dev ;6240 size_t alloc_size;6241 struct net_device *p ;6242 6243 BUG_ON(strlen (name) >= sizeof (dev->name));6244 6245 if (txqs < 1 ) {6246 pr_err("alloc_netdev: Unable to allocate device with zero queues\n" );6247 return NULL ;6248 }6249 6250 #ifdef CONFIG_RPS 6251 if (rxqs < 1 ) {6252 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n" );6253 return NULL ;6254 }6255 #endif 6256 6257 alloc_size = sizeof (struct net_device);6258 if (sizeof_priv) {6259 6260 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);6261 alloc_size += sizeof_priv;6262 }6263 6264 alloc_size += NETDEV_ALIGN - 1 ;6265 6266 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);6267 if (!p)6268 p = vzalloc(alloc_size);6269 if (!p)6270 return NULL ;6271 6272 dev = PTR_ALIGN(p, NETDEV_ALIGN);6273 dev->padded = (char *)dev - (char *)p;6274 6275 dev->pcpu_refcnt = alloc_percpu(int );6276 if (!dev->pcpu_refcnt)6277 goto free_dev;6278 6279 if (dev_addr_init(dev))6280 goto free_pcpu;6281 6282 dev_mc_init(dev);6283 dev_uc_init(dev);6284 6285 dev_net_set(dev, &init_net);6286 6287 dev->gso_max_size = GSO_MAX_SIZE;6288 dev->gso_max_segs = GSO_MAX_SEGS;6289 6290 INIT_LIST_HEAD(&dev->napi_list);6291 INIT_LIST_HEAD(&dev->unreg_list);6292 INIT_LIST_HEAD(&dev->close_list);6293 INIT_LIST_HEAD(&dev->link_watch_list);6294 INIT_LIST_HEAD(&dev->adj_list.upper);6295 INIT_LIST_HEAD(&dev->adj_list.lower);6296 INIT_LIST_HEAD(&dev->all_adj_list.upper);6297 INIT_LIST_HEAD(&dev->all_adj_list.lower);6298 dev->priv_flags = IFF_XMIT_DST_RELEASE;6299 setup(dev);6300 6301 dev->num_tx_queues = txqs;6302 dev->real_num_tx_queues = txqs;6303 if (netif_alloc_netdev_queues(dev))6304 goto free_all;6305 6306 #ifdef CONFIG_RPS 6307 dev->num_rx_queues = rxqs;6308 dev->real_num_rx_queues = rxqs;6309 if (netif_alloc_rx_queues(dev))6310 goto free_all;6311 #endif 6312 6313 strcpy (dev->name, name);6314 dev->group = INIT_NETDEV_GROUP;6315 if (!dev->ethtool_ops)6316 dev->ethtool_ops = &default_ethtool_ops;6317 return dev;6318 6319 free_all:6320 free_netdev(dev);6321 return NULL ;6322 6323 free_pcpu:6324 free_percpu(dev->pcpu_refcnt);6325 netif_free_tx_queues(dev);6326 #ifdef CONFIG_RPS 6327 kfree(dev->_rx);6328 #endif 6329 6330 free_dev:6331 netdev_freemem(dev);6332 return NULL ;6333 }6334 EXPORT_SYMBOL(alloc_netdev_mqs);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 5742 static int netif_alloc_netdev_queues (struct net_device *dev) 5743 { 5744 unsigned int count = dev->num_tx_queues;5745 struct netdev_queue *tx ;5746 size_t sz = count * sizeof (*tx);5747 5748 BUG_ON(count < 1 || count > 0xffff );5749 5750 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);5751 if (!tx) {5752 tx = vzalloc(sz);5753 if (!tx)5754 return -ENOMEM;5755 }5756 dev->_tx = tx;5757 5758 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL );5759 spin_lock_init(&dev->tx_global_lock);5760 5761 return 0 ;5762 }
1 2 3 4 5 6 7 8 9 10 11 12 13 5720 static void netdev_init_one_queue (struct net_device *dev, 5721 struct netdev_queue *queue , void *_unused)5722 { 5723 5724 spin_lock_init(&queue ->_xmit_lock);5725 netdev_set_xmit_lockdep_class(&queue ->_xmit_lock, dev->type);5726 queue ->xmit_lock_owner = -1 ;5727 netdev_queue_numa_node_write(queue , NUMA_NO_NODE);5728 queue ->dev = dev;5729 #ifdef CONFIG_BQL 5730 dql_init(&queue ->dql, HZ);5731 #endif 5732 }
1 2 3 4 5 6 7 8 9 10 5972 int register_netdev (struct net_device *dev) 5973 { 5974 int err;5975 5976 rtnl_lock();5977 err = register_netdevice(dev);5978 rtnl_unlock();5979 return err;5980 }5981 EXPORT_SYMBOL(register_netdev);
1 2 3 4 5 6 7 8 9 10 5781 int register_netdevice (struct net_device *dev) 5782 { ... 5879 linkwatch_init_dev(dev);5880 5881 dev_init_scheduler(dev);5882 dev_hold(dev);5883 list_netdevice(dev);5884 add_device_randomness(dev->dev_addr, dev->addr_len);...
1 2 3 4 5 6 7 8 9 876 void dev_init_scheduler (struct net_device *dev) 877 { 878 dev->qdisc = &noop_qdisc;879 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);880 if (dev_ingress_queue(dev))881 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);882 883 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long )dev);884 }
1 2 3 4 5 6 7 8 9 866 static void dev_init_scheduler_queue(struct net_device *dev, 867 struct netdev_queue *dev_queue, 868 void *_qdisc) 869 { 870 struct Qdisc *qdisc = _qdisc; 871 872 dev_queue->qdisc = qdisc; 873 dev_queue->qdisc_sleeping = qdisc; 874 }
1 2 3 4 5 6 7 8 9 10 366 struct Qdisc noop_qdisc = {367 .enqueue = noop_enqueue,368 .dequeue = noop_dequeue,369 .flags = TCQ_F_BUILTIN,370 .ops = &noop_qdisc_ops,371 .list = LIST_HEAD_INIT(noop_qdisc.list ),372 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),373 .dev_queue = &noop_netdev_queue,374 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),375 };
functions for Part 2. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 745 void dev_activate (struct net_device *dev) 746 { 747 int need_watchdog;748 749 753 754 if (dev->qdisc == &noop_qdisc)755 attach_default_qdiscs(dev);756 757 if (!netif_carrier_ok(dev))758 759 return ;760 761 need_watchdog = 0 ;762 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);763 if (dev_ingress_queue(dev))764 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL );765 766 if (need_watchdog) {767 dev->trans_start = jiffies;768 dev_watchdog_up(dev);769 }770 }771 EXPORT_SYMBOL(dev_activate);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 708 static void attach_default_qdiscs (struct net_device *dev) 709 { 710 struct netdev_queue *txq ;711 struct Qdisc *qdisc ;712 713 txq = netdev_get_tx_queue(dev, 0 );714 715 if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0 ) {716 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL );717 dev->qdisc = txq->qdisc_sleeping;718 atomic_inc (&dev->qdisc->refcnt);719 } else {720 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);721 if (qdisc) {722 qdisc->ops->attach(qdisc);723 dev->qdisc = qdisc;724 }725 }726 }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 223 static const struct Qdisc_class_ops mq_class_ops = {224 .select_queue = mq_select_queue,225 .graft = mq_graft,226 .leaf = mq_leaf,227 .get = mq_get,228 .put = mq_put,229 .walk = mq_walk,230 .dump = mq_dump_class,231 .dump_stats = mq_dump_class_stats,232 };233 234 struct Qdisc_ops mq_qdisc_ops __read_mostly = {235 .cl_ops = &mq_class_ops,236 .id = "mq" ,237 .priv_size = sizeof (struct mq_sched),238 .init = mq_init,239 .destroy = mq_destroy,240 .attach = mq_attach,241 .dump = mq_dump,242 .owner = THIS_MODULE,243 };
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 584 struct Qdisc *qdisc_create_dflt (struct netdev_queue *dev_queue, 585 const struct Qdisc_ops *ops,586 unsigned int parentid)587 { 588 struct Qdisc *sch ;589 590 if (!try_module_get(ops->owner))591 goto errout;592 593 sch = qdisc_alloc(dev_queue, ops);594 if (IS_ERR(sch))595 goto errout;596 sch->parent = parentid;597 598 if (!ops->init || ops->init(sch, NULL ) == 0 )599 return sch;600 601 qdisc_destroy(sch);602 errout:603 return NULL ;604 }