static void __init bootmem_init(void)
{
unsigned long reserved_end;
unsigned long mapstart = ~0UL;
unsigned long bootmap_size;
int i;
 
/*
* Init any data related to initrd. It's a nop if INITRD is
* not selected. Once that done we can determine the low bound
* of usable memory.
*/
reserved_end = max(init_initrd(),
  (unsigned long) PFN_UP(__pa_symbol(&_end)));
 
/*
* max_low_pfn is not a number of pages. The number of pages
* of the system is given by 'max_low_pfn - min_low_pfn'.
*/
min_low_pfn = ~0UL;
max_low_pfn = 0;
 
/*
* Find the highest page frame number we have available.
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
 
if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
continue;
 
start = PFN_UP(boot_mem_map.map[i].addr);
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
 
if (end > max_low_pfn)
max_low_pfn = end;
if (start < min_low_pfn)
min_low_pfn = start;
if (end <= reserved_end)
continue;
if (start >= mapstart)
continue;
mapstart = max(reserved_end, start);
}
 
if (min_low_pfn >= max_low_pfn)
panic("Incorrect memory mapping !!!");
if (min_low_pfn > ARCH_PFN_OFFSET) {
pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
(min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
min_low_pfn - ARCH_PFN_OFFSET);
} else if (min_low_pfn < ARCH_PFN_OFFSET) {
pr_info("%lu free pages won't be used\n",
ARCH_PFN_OFFSET - min_low_pfn);
}
min_low_pfn = ARCH_PFN_OFFSET;
 
/*
* Determine low and high memory ranges
*/
max_pfn = max_low_pfn;
if (max_low_pfn > PFN_DOWN(HIGHMEM_START)) {
#ifdef CONFIG_HIGHMEM
highstart_pfn = PFN_DOWN(HIGHMEM_START);
highend_pfn = max_low_pfn;
#endif
max_low_pfn = PFN_DOWN(HIGHMEM_START);
}
 
/*
* Initialize the boot-time allocator with low memory only.
*/
bootmap_size = init_bootmem_node(NODE_DATA(0), mapstart,
min_low_pfn, max_low_pfn);
 
 
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
 
start = PFN_UP(boot_mem_map.map[i].addr);
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
 
if (start <= min_low_pfn)
start = min_low_pfn;
if (start >= end)
continue;
 
#ifndef CONFIG_HIGHMEM
if (end > max_low_pfn)
end = max_low_pfn;
 
/*
* ... finally, is the area going away?
*/
if (end <= start)
continue;
#endif
 
add_active_range(0, start, end);
}
 
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end, size;
 
/*
* Reserve usable memory.
*/
if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
continue;
 
start = PFN_UP(boot_mem_map.map[i].addr);
end   = PFN_DOWN(boot_mem_map.map[i].addr
   + boot_mem_map.map[i].size);
/*
* We are rounding up the start address of usable memory
* and at the end of the usable range downwards.
*/
if (start >= max_low_pfn)
continue;
if (start < reserved_end)
start = reserved_end;
if (end > max_low_pfn)
end = max_low_pfn;
 
/*
* ... finally, is the area going away?
*/
if (end <= start)
continue;
size = end - start;
 
/* Register lowmem ranges */
free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);
memory_present(0, start, end);
}
 
/*
* Reserve the bootmap memory.
*/
reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);
 
/*
* Reserve initrd memory if needed.
*/
finalize_initrd();
}
 
 
 
struct node_active_region {
unsigned long start_pfn;
unsigned long end_pfn;
int nid;
};
 
 
 
mm/page_alloc.c
===================
  static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
 
 
 
/**
 * add_active_range - Register a range of PFNs backed by physical memory
 * @nid: The node ID the range resides on
 * @start_pfn: The start PFN of the available physical memory
 * @end_pfn: The end PFN of the available physical memory
 *
 * These ranges are stored in an early_node_map[] and later used by
 * free_area_init_nodes() to calculate zone sizes and holes. If the
 * range spans a memory hole, it is up to the architecture to ensure
 * the memory is not freed by the bootmem allocator. If possible
 * the range being registered will be merged with existing ranges.
 */
void __init add_active_range(unsigned int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
int i;
 
mminit_dprintk(MMINIT_TRACE, "memory_register",
"Entering add_active_range(%d, %#lx, %#lx) "
"%d entries of %d used\n",
nid, start_pfn, end_pfn,
nr_nodemap_entries, MAX_ACTIVE_REGIONS);
 
mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
 
/* Merge with existing active regions if possible */
for (i = 0; i < nr_nodemap_entries; i++) {
if (early_node_map[i].nid != nid)
continue;
 
/* Skip if an existing region covers this new one */
if (start_pfn >= early_node_map[i].start_pfn &&
end_pfn <= early_node_map[i].end_pfn)
return;
 
/* Merge forward if suitable */
if (start_pfn <= early_node_map[i].end_pfn &&
end_pfn > early_node_map[i].end_pfn) {
early_node_map[i].end_pfn = end_pfn;
return;
}
 
/* Merge backward if suitable */
if (start_pfn < early_node_map[i].start_pfn &&
end_pfn >= early_node_map[i].start_pfn) {
early_node_map[i].start_pfn = start_pfn;
return;
}
}
 
/* Check that early_node_map is large enough */
if (i >= MAX_ACTIVE_REGIONS) {
printk(KERN_CRIT "More than %d memory regions, truncating\n",
MAX_ACTIVE_REGIONS);
return;
}
 
early_node_map[i].nid = nid;
early_node_map[i].start_pfn = start_pfn;
early_node_map[i].end_pfn = end_pfn;
nr_nodemap_entries = i + 1;
}
 
 
 
 
 
arch/mips/mm/init.c
====================================
 
 
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
unsigned long lastpfn __maybe_unused;
 
pagetable_init();
 
#ifdef CONFIG_HIGHMEM
kmap_init();
#endif
kmap_coherent_init();
 
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
#endif
#ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
lastpfn = max_low_pfn;
#ifdef CONFIG_HIGHMEM
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
lastpfn = highend_pfn;
 
if (cpu_has_dc_aliases && max_low_pfn != highend_pfn) {
printk(KERN_WARNING "This processor doesn't support highmem."
      " %ldk highmem ignored\n",
      (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10));
max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn;
lastpfn = max_low_pfn;
}
#endif
 
free_area_init_nodes(max_zone_pfns);
}
 
#ifdef CONFIG_64BIT
static struct kcore_list kcore_kseg0;
#endif
 
 
arch/mips/kernel.c
================================
void __init setup_arch(char **cmdline_p)
{
cpu_probe();
prom_init();<=====
 
#ifdef CONFIG_EARLY_PRINTK
setup_early_printk();
#endif
cpu_report();
check_bugs_early();
 
#if defined(CONFIG_VT)
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
 
arch_mem_init(cmdline_p);<=====
 
resource_init();
plat_smp_setup();
}

2011-09-28

neighbour

neighbour 学习笔记（kernel 3.0)

For ethernet, dev->header_ops is eth_header_ops

 936 static int __devinit e1000_probe(struct pci_dev *pdev,          
 937                                  const struct pci_device_id *ent)
 938  
...
 973         netdev = alloc_etherdev(sizeof(struct e1000_adapter));

include/linux/etherdevice.h

1 2	53 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) 54 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)

net/ethernet/eth.c

365 struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
366                                       unsigned int rxqs)
367 {
368         return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
369 }

net/core/dev.c

5821 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5822                 void (*setup)(struct net_device *), 
5823                 unsigned int txqs, unsigned int rxqs)
...
5880         dev->priv_flags = IFF_XMIT_DST_RELEASE;
5881         setup(dev); <=== 
5882 
5883         dev->num_tx_queues = txqs;

net/ethernet/eth.c

334 void ether_setup(struct net_device *dev)
336         dev->header_ops         = &eth_header_ops;<===
337         dev->type               = ARPHRD_ETHER;
338         dev->hard_header_len    = ETH_HLEN;
339         dev->mtu                = ETH_DATA_LEN;
340         dev->addr_len           = ETH_ALEN;
341         dev->tx_queue_len       = 1000; /* Ethernet wants good queues */
342         dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
343         dev->priv_flags         = IFF_TX_SKB_SHARING;
344 
345         memset(dev->broadcast, 0xFF, ETH_ALEN);
346 
347 }

2011-04-17

route

linux kernel IPv6 route

IPv6 route tree 原理.
IPv6路由采用二叉树的形式进行存储，查找任意路由最多需要128次比较（128次，说法不太严格）。
因此其算法复杂度为常数，因此IPv6里没有像IPv4那样的cache。

2011-01-22

gcc

fix bug: timezone of toolchain

When we compile a glibc(or eglibc), we need generated the timezone data file with it. although, it is stable and no change almost in every version update.

Today a problem is met about it.

We use the old glibc’s timezone file, which is used by many different toolchain for several paltforms.

unfortunately.the data file has been change after 2007 year by GNU official. but I did not found the exact version(date) of glibc, which change the timezone data file.

btw: toolchain = binutils + gcc + glibc(eglic) + kernel(header)

2010-06-02

xfrm

pf_key module summary

###af_key.c
linux kernel provide 3 method to manager SA/SP,
such as add/del/flush/dump SAs/SPs.

pf_key socket.
netlink message.
socket option.

The af_key.c implement the pf_key socket.

###part 1. pf_key socket defination about socket opertion.
important function is
pfkey_create,pfkey_sendmsg,pfkey_recvmsg,
pfkey_release,datagram_poll,

static const struct proto_ops pfkey_ops = { 
.family        =    PF_KEY, 
.owner        =    THIS_MODULE, 
/* Operations that make no sense on pfkey sockets. */ 
.bind        =    sock_no_bind, 
.connect    =    sock_no_connect, 
.socketpair    =    sock_no_socketpair, 
.accept        =    sock_no_accept, 
.getname    =    sock_no_getname, 
.ioctl        =    sock_no_ioctl, 
.listen        =    sock_no_listen, 
.shutdown    =    sock_no_shutdown, 
.setsockopt    =    sock_no_setsockopt, 
.getsockopt    =    sock_no_getsockopt, 
.mmap        =    sock_no_mmap, 
.sendpage    =    sock_no_sendpage, 

/* Now the operations that really occur. */ 
.release    =    pfkey_release, 
.poll        =    datagram_poll, 
.sendmsg    =    pfkey_sendmsg, 
.recvmsg    =    pfkey_recvmsg, 
};


static struct net_proto_family pfkey_family_ops = { 
.family    =    PF_KEY, 
.create    =    pfkey_create, 
.owner    =    THIS_MODULE, 
};


struct pfkey_sock { 
/* struct sock must be the first member of struct pfkey_sock */ 
struct sock    sk; 
int        registered; 
int        promisc; 

struct { 
uint8_t        msg_version; 
uint32_t    msg_pid; 
int        (*dump)(struct pfkey_sock *sk); 
void        (*done)(struct pfkey_sock *sk); 
union { 
struct xfrm_policy_walk    policy; 
struct xfrm_state_walk    state; 
} u; 
struct sk_buff    *skb; 
} dump; 
};

###part 2. pf_key kernel message

static struct xfrm_mgr pfkeyv2_mgr =
{ 
.id        = "pfkeyv2", 
.notify        = pfkey_send_notify, 
.acquire    = pfkey_send_acquire, 
.compile_policy    = pfkey_compile_policy, 
.new_mapping    = pfkey_send_new_mapping, 
.notify_policy    = pfkey_send_policy_notify, 
.migrate    = pfkey_send_migrate, 
};

pf_key message process.

in kernel 3.0, pf_key message format
A traditional TLV format.

header + (extenion-header + extention_value)*n

The header is sadb_msg.
extention header is sadb_ext.
extention value is different according the extention header.
Such as sadb_sa,sadb_x_policy and so on.

struct sadb_msg { 
uint8_t        sadb_msg_version; 
uint8_t        sadb_msg_type; 
uint8_t        sadb_msg_errno; 
uint8_t        sadb_msg_satype; 
uint16_t    sadb_msg_len; 
uint16_t    sadb_msg_reserved; 
uint32_t    sadb_msg_seq; 
uint32_t    sadb_msg_pid; 
} __attribute__((packed)); 
/* sizeof(struct sadb_msg) == 16 */ 

struct sadb_ext { 
uint16_t    sadb_ext_len; 
uint16_t    sadb_ext_type; 
} __attribute__((packed)); 
/* sizeof(struct sadb_ext) == 4 */ 


struct sadb_sa { 
uint16_t    sadb_sa_len; 
uint16_t    sadb_sa_exttype; 
__be32        sadb_sa_spi; 
uint8_t        sadb_sa_replay; 
uint8_t        sadb_sa_state; 
uint8_t        sadb_sa_auth; 
uint8_t        sadb_sa_encrypt; 
uint32_t    sadb_sa_flags; 
} __attribute__((packed)); 
/* sizeof(struct sadb_sa) == 16 */

struct sadb_x_policy { 
uint16_t    sadb_x_policy_len; 
uint16_t    sadb_x_policy_exttype; 
uint16_t    sadb_x_policy_type; 
uint8_t        sadb_x_policy_dir; 
uint8_t        sadb_x_policy_reserved; 
uint32_t    sadb_x_policy_id; 
uint32_t    sadb_x_policy_priority; 
} __attribute__((packed)); 
/* sizeof(struct sadb_x_policy) == 16 */

The application program(such as setkey) sent a command to kernel by sendmsg system API.
Thus in kernel pf_key will call pfkey_sendmsg.
pfkey_sendmsg will call pfkey_get_base_msg to do some simple check, and
then call pfkey_process.

pfkey_process will first pfkey_broadcast, then divid the extention message
to a pointer array one by one.
void *ext_hdrs\[SADB_EXT_MAX\];
SADB_EXT_SA —->
SADB_EXT_ADDRESS_SRC—->
SADB_EXT_ADDRESS_DST—->
this pointer array will be used by the following handler.

and then call the pfkey_handler according the sadb_msg_type in the pf_key messag header.

typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs);

typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs); 
static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { 
[SADB_RESERVED]        = pfkey_reserved, 
[SADB_GETSPI]        = pfkey_getspi, 
[SADB_UPDATE]        = pfkey_add, 
[SADB_ADD]        = pfkey_add, 
[SADB_DELETE]        = pfkey_delete, 
[SADB_GET]        = pfkey_get, 
[SADB_ACQUIRE]        = pfkey_acquire, 
[SADB_REGISTER]        = pfkey_register, 
[SADB_EXPIRE]        = NULL, 
[SADB_FLUSH]        = pfkey_flush, 
[SADB_DUMP]        = pfkey_dump, 
[SADB_X_PROMISC]    = pfkey_promisc, 
[SADB_X_PCHANGE]    = NULL, 
[SADB_X_SPDUPDATE]    = pfkey_spdadd, 
[SADB_X_SPDADD]        = pfkey_spdadd, 
[SADB_X_SPDDELETE]    = pfkey_spddelete, 
[SADB_X_SPDGET]        = pfkey_spdget, 
[SADB_X_SPDACQUIRE]    = NULL, 
[SADB_X_SPDDUMP]    = pfkey_spddump, 
[SADB_X_SPDFLUSH]    = pfkey_spdflush, 
[SADB_X_SPDSETIDX]    = pfkey_spdadd, 
[SADB_X_SPDDELETE2]    = pfkey_spdget, 
[SADB_X_MIGRATE]    = pfkey_migrate, 
};

The policy related function was done in xfrm_policy.c and xfrm_state.c
3.1 policy add handler: pfkey_spdadd

3.2 polcy dump handler: pfkey_spddump
function pfkey_xfrm_policy2msg

3.3 policy flush handler: pfkey_spdflush.

3.4 SA add handler:pfkey_add

3.2 SA dump handler:pfkey_dump

3.3 SA flush handler:pfkey_flush

2009-05-13

xfrm

xfrm in kernel

global var and structure:

static DEFINE_PER_NET(struct hlist_head *, xfrm_state_byspi);
static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
xfrm_policy_afinfo 定义一个大的数组，每一个元素对应一个地址族，如ipv4（AF_INET），ipv6(AF_INET6).

2007-12-11

others

嵌入Linux下的usb storage的支持

目标：在mipsel架构的嵌入式linux系统上支持USB盘的读写。
思路：使用可加载模块的形式增加 scsimod.ko, sdmod.ko, usbstorage.ko, fat.ko, vfat.ko.

##具体实现：
###步骤I 准备必须的驱动（可加载模块）

scsimod.ko 源代码目录下的 driver/scsi目录下的文件编译
sdmod.ko, 源代码目录下的 driver/scsi目录下的文件编译
usbstorage.ko 源代码目录下的 driver/usb/storage 目录下的文件编译
fat.ko 源代码目录下的 fs/fat目录下的文件编译
vfat.ko 源代码目录下的 fs/vfat目录下的文件编译
上述五个可加载模块的编译过程可以具体参考各自目录下的Makefile。