Libnids中TCP相关实现

变量

nids_tcp_timeouts

双向链表nids_tcp_timeouts按结点的timeout从小到大排列,

tcp_latest, tcp_oldest

static struct tcp_stream *tcp_latest = 0, *tcp_oldest = 0;

tcp_latest指向最新的TCP连接,tcp_oldest指向最老的连接,所有TCP连接组成一个双向链表,前驱是更新的连接,后继是更旧的连接。

tcp_stream_table

static struct tcp_stream **tcp_stream_table;

使用哈希表tcp_stream_table保存所有跟踪的TCP连接,发生碰撞的连接通过双向链表保存。

streams_pool, free_streams

1
2
static struct tcp_stream *streams_pool;
static struct tcp_stream *free_streams;

streams_pool保存为流分配的内存的首地址,free_streams指向下一个可使用的流空间。

tcp_procs

1
struct proc_node *tcp_procs;

为处理TCP连接注册的所有回调函数。

结构体

skbuff

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
struct skbuff
{
struct skbuff *next;
struct skbuff *prev;

void *data;
u_int len;
u_int truesize;
u_int urg_ptr;

char fin;
char urg;
u_int seq;
u_int ack;
};

tcp_stream

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
struct tcp_stream
{
struct tuple4 addr;
char nids_state;
struct lurker_node *listeners; // 该连接监听者
struct half_stream client; // 主动打开的一端
struct half_stream server; // 被动打开的一端
struct tcp_stream *next_node; // 与prev_node字段类似,
struct tcp_stream *prev_node; // 指向双向链表的前驱或者后继
int hash_index; // 在哈希表中的位置
struct tcp_stream *next_time; // 指向下一个旧的TCP连接
struct tcp_stream *prev_time; // 指向下一个新的TCP连接
int read;
struct tcp_stream *next_free; // 用于指向内存池的下一个可用位置
void *user;
};

half_stream

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
struct half_stream
{
char state;
char collect;
char collect_urg;

char *data; // 缓冲区地址
int offset; // 缓冲区首字节在TCP流中的偏移
int count; // 收到的所有字节数
int count_new; // 本次(最近一次)收到的字节数
int bufsize; // 缓冲区大小
int rmem_alloc; // 缓存的所有报文段大小

int urg_count;
u_int acked;
u_int seq;
u_int ack_seq;
u_int first_data_seq;
u_char urgdata;
u_char count_new_urg;
u_char urg_seen;
u_int urg_ptr;
u_short window;
u_char ts_on;
u_char wscale_on;
u_int curr_ts;
u_int wscale;
struct skbuff *list; // TCP队列,缓存报文段
struct skbuff *listtail; // 双向链表,根据序列号以<排序
};

lurker_node

1
2
3
4
5
6
7
struct lurker_node              // 潜伏结点,用于处理TCP连接
{
void (*item)(); // 回调函数地址
void *data; // 传给回调函数的数据地址
char whatto; // (紧急)数据达到client或者server
struct lurker_node *next;
};

函数

tcp_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
int tcp_init(int size)
{
int i;
struct tcp_timeout *tmp;

if (!size) return 0;
tcp_stream_table_size = size;
tcp_stream_table = calloc(tcp_stream_table_size, sizeof(char *));
if (!tcp_stream_table)
{
nids_params.no_mem("tcp_init");
return -1;
}
max_stream = 3 * tcp_stream_table_size / 4;
streams_pool = (struct tcp_stream *) malloc((max_stream + 1) * sizeof(struct tcp_stream));
if (!streams_pool)
{
nids_params.no_mem("tcp_init");
return -1;
}
for (i = 0; i < max_stream; i++)
streams_pool[i].next_free = &(streams_pool[i + 1]);
streams_pool[max_stream].next_free = 0;
free_streams = streams_pool;
init_hash();
while (nids_tcp_timeouts)
{
tmp = nids_tcp_timeouts->next;
free(nids_tcp_timeouts);
nids_tcp_timeouts = tmp;
}
return 0;
}

TCP的相关初始化,为哈希表分配tcp_stream_table_size大小的空间,默认是1040,可跟踪的最多连接是max_stream = 3 * tcp_stream_table_size / 4个,然后为连接池分配max_stream + 1tcp_stream的空间,接下来将这些元素通过next_free连接起来,next_free表示下一个可用的结点,全局变量free_streams指向刚刚分配的空间,然后初始化哈希,下一个循环处理处于超时关闭状态的TCP连接,删除这些连接并释放内存。

add_tcp_closing_timeout

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
static void add_tcp_closing_timeout(struct tcp_stream *a_tcp)
{
struct tcp_timeout *to;
struct tcp_timeout *newto;

if (!nids_params.tcp_workarounds)
return;
newto = malloc(sizeof (struct tcp_timeout));
if (!newto)
nids_params.no_mem("add_tcp_closing_timeout");
newto->a_tcp = a_tcp;
newto->timeout.tv_sec = nids_last_pcap_header->ts.tv_sec + 10;
newto->prev = 0;
for (newto->next = to = nids_tcp_timeouts; to; newto->next = to = to->next)
{
if (to->a_tcp == a_tcp)
{
free(newto);
return;
}
if (to->timeout.tv_sec > newto->timeout.tv_sec)
break;
newto->prev = to;
}
if (!newto->prev)
nids_tcp_timeouts = newto;
else
newto->prev->next = newto;
if (newto->next)
newto->next->prev = newto;
}

a_tcp添加到双向链表nids_tcp_timeouts中(如果不存在)并使其依然有序。

del_tcp_closing_timeout

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void del_tcp_closing_timeout(struct tcp_stream *a_tcp)
{
struct tcp_timeout *to;

if (!nids_params.tcp_workarounds)
return;
for (to = nids_tcp_timeouts; to; to = to->next)
if (to->a_tcp == a_tcp)
break;
if (!to)
return;
if (!to->prev)
nids_tcp_timeouts = to->next;
else
to->prev->next = to->next;
if (to->next)
to->next->prev = to->prev;
free(to);
}

a_tcp对应的结点从nids_tcp_timeouts链表中删除并释放内存。

nids_free_tcp_stream

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
void nids_free_tcp_stream(struct tcp_stream *a_tcp)
{
int hash_index = a_tcp->hash_index;
struct lurker_node *i, *j;

del_tcp_closing_timeout(a_tcp);
purge_queue(&a_tcp->server);
purge_queue(&a_tcp->client);

if (a_tcp->next_node)
a_tcp->next_node->prev_node = a_tcp->prev_node;
if (a_tcp->prev_node)
a_tcp->prev_node->next_node = a_tcp->next_node;
else
tcp_stream_table[hash_index] = a_tcp->next_node;
if (a_tcp->client.data)
free(a_tcp->client.data);
if (a_tcp->server.data)
free(a_tcp->server.data);
if (a_tcp->next_time)
a_tcp->next_time->prev_time = a_tcp->prev_time;
if (a_tcp->prev_time)
a_tcp->prev_time->next_time = a_tcp->next_time;
if (a_tcp == tcp_oldest)
tcp_oldest = a_tcp->prev_time;
if (a_tcp == tcp_latest)
tcp_latest = a_tcp->next_time;

i = a_tcp->listeners;

while (i)
{
j = i->next;
free(i);
i = j;
}
a_tcp->next_free = free_streams;
free_streams = a_tcp;
tcp_num--;
}

a_tcp从哈希表中删除,释放占用内存,调整前驱后继,并使可用的流空间指向该地址。

tcp_check_timeouts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
void tcp_check_timeouts(struct timeval *now)
{
struct tcp_timeout *to;
struct tcp_timeout *next;
struct lurker_node *i;

for (to = nids_tcp_timeouts; to; to = next)
{
if (now->tv_sec < to->timeout.tv_sec)
return;
to->a_tcp->nids_state = NIDS_TIMED_OUT;
for (i = to->a_tcp->listeners; i; i = i->next)
(i->item)(to->a_tcp, &i->data);
next = to->next;
nids_free_tcp_stream(to->a_tcp);
}
}

将所有超时的连接删除。

get_ts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static int get_ts(struct tcphdr *this_tcphdr, unsigned int *ts)
{
int len = 4 * this_tcphdr->th_off;
unsigned int tmp_ts;
unsigned char *options = (unsigned char*)(this_tcphdr + 1);
int ind = 0, ret = 0;
while (ind <= len - (int)sizeof (struct tcphdr) - 10)
switch (options[ind])
{
case 0: /* TCPOPT_EOL */
return ret;
case 1: /* TCPOPT_NOP */
ind++;
continue;
case 8: /* TCPOPT_TIMESTAMP */
memcpy((char*)&tmp_ts, options + ind + 2, 4);
*ts = ntohl(tmp_ts);
ret = 1;
/* no break, intentionally */
default:
if (options[ind + 1] < 2) /* "silly option" */
return ret;
ind += options[ind + 1];
}

return ret;
}

遍历TCP头部的选项,获取时间戳。看一下TCP头部选项的格式就能理解。

get_wscale

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
static int get_wscale(struct tcphdr *this_tcphdr, unsigned int *ws)
{
int len = 4 * this_tcphdr->th_off;
unsigned int tmp_ws;
unsigned char *options = (unsigned char*)(this_tcphdr + 1);
int ind = 0, ret = 0;
*ws = 1;
while (ind <= len - (int)sizeof (struct tcphdr) - 3)
switch (options[ind])
{
case 0: /* TCPOPT_EOL */
return ret;
case 1: /* TCPOPT_NOP */
ind++;
continue;
case 3: /* TCPOPT_WSCALE */
tmp_ws = options[ind + 2];
if (tmp_ws > 14)
tmp_ws = 14;
*ws = 1 << tmp_ws;
ret = 1;
/* no break, intentionally */
default:
if (options[ind + 1] < 2) /* "silly option" */
return ret;
ind += options[ind + 1];
}

return ret;
}

获取窗口的扩大因子。

add_new_tcp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
static void add_new_tcp(struct tcphdr *this_tcphdr, struct ip *this_iphdr)
{
struct tcp_stream *tolink;
struct tcp_stream *a_tcp;
int hash_index;
struct tuple4 addr;

addr.source = ntohs(this_tcphdr->th_sport);
addr.dest = ntohs(this_tcphdr->th_dport);
addr.saddr = this_iphdr->ip_src.s_addr;
addr.daddr = this_iphdr->ip_dst.s_addr;
hash_index = mk_hash_index(addr);

if (tcp_num > max_stream)
{
struct lurker_node *i;
int orig_client_state = tcp_oldest->client.state;
tcp_oldest->nids_state = NIDS_TIMED_OUT;
for (i = tcp_oldest->listeners; i; i = i->next)
(i->item)(tcp_oldest, &i->data);
nids_free_tcp_stream(tcp_oldest);
if (orig_client_state != TCP_SYN_SENT)
nids_params.syslog(NIDS_WARN_TCP, NIDS_WARN_TCP_TOOMUCH, ugly_iphdr, this_tcphdr);
}
a_tcp = free_streams;
if (!a_tcp)
{
fprintf(stderr, "gdb me ...\n");
pause();
}
free_streams = a_tcp->next_free;

tcp_num++;
tolink = tcp_stream_table[hash_index];
memset(a_tcp, 0, sizeof(struct tcp_stream));
a_tcp->hash_index = hash_index;
a_tcp->addr = addr;
a_tcp->client.state = TCP_SYN_SENT;
a_tcp->client.seq = ntohl(this_tcphdr->th_seq) + 1;
a_tcp->client.first_data_seq = a_tcp->client.seq;
a_tcp->client.window = ntohs(this_tcphdr->th_win);
a_tcp->client.ts_on = get_ts(this_tcphdr, &a_tcp->client.curr_ts);
a_tcp->client.wscale_on = get_wscale(this_tcphdr, &a_tcp->client.wscale);
a_tcp->server.state = TCP_CLOSE;
a_tcp->next_node = tolink;
a_tcp->prev_node = 0;
if (tolink)
tolink->prev_node = a_tcp;
tcp_stream_table[hash_index] = a_tcp;
a_tcp->next_time = tcp_latest;
a_tcp->prev_time = 0;
if (!tcp_oldest)
tcp_oldest = a_tcp;
if (tcp_latest)
tcp_latest->prev_time = a_tcp;
tcp_latest = a_tcp;
}

添加新的TCP连接,如果连接个数超过最大值,删除最老的连接。

然后在可用的内存池free_streams里分配一个tcp_stream空间给a_tcp,并调整可用内存池的首地址。接下来初始化a_tcp,第一个报文段的序列号是初始序列号this_tcphdr->th_seq加1。如果哈希表相应位置已存在TCP连接,则将a_tcp插到链表头部。然后调整最新的TCP连接和最老的TCP连接。

add2buf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
static void add2buf(struct half_stream *rcv, char *data, int datalen)
{
int toalloc;

if (datalen + rcv->count - rcv->offset > rcv->bufsize)
{
if (!rcv->data)
{
if (datalen < 2048)
toalloc = 4096;
else
toalloc = datalen * 2;
rcv->data = malloc(toalloc);
rcv->bufsize = toalloc;
}
else
{
if (datalen < rcv->bufsize)
toalloc = 2 * rcv->bufsize;
else
toalloc = rcv->bufsize + 2 * datalen;
rcv->data = realloc(rcv->data, toalloc);
rcv->bufsize = toalloc;
}
if (!rcv->data)
nids_params.no_mem("add2buf");
}
memcpy(rcv->data + rcv->count - rcv->offset, data, datalen);
rcv->count_new = datalen;
rcv->count += datalen;
}

将本次收到的数据加入缓冲区,如果会造成溢出,根据缓冲区已有数据和本次收到数据大小,以2的幂方式增长缓冲区。

ride_lurkers

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
static void ride_lurkers(struct tcp_stream *a_tcp, char mask)
{
struct lurker_node *i;
char cc, sc, ccu, scu;

for (i = a_tcp->listeners; i; i = i->next)
if (i->whatto & mask)
{
cc = a_tcp->client.collect;
sc = a_tcp->server.collect;
ccu = a_tcp->client.collect_urg;
scu = a_tcp->server.collect_urg;

(i->item)(a_tcp, &i->data);
if (cc < a_tcp->client.collect)
i->whatto |= COLLECT_cc;
if (ccu < a_tcp->client.collect_urg)
i->whatto |= COLLECT_ccu;
if (sc < a_tcp->server.collect)
i->whatto |= COLLECT_sc;
if (scu < a_tcp->server.collect_urg)
i->whatto |= COLLECT_scu;
if (cc > a_tcp->client.collect)
i->whatto &= ~COLLECT_cc;
if (ccu > a_tcp->client.collect_urg)
i->whatto &= ~COLLECT_ccu;
if (sc > a_tcp->server.collect)
i->whatto &= ~COLLECT_sc;
if (scu > a_tcp->server.collect_urg)
i->whatto &= ~COLLECT_scu;
}
}

根据潜伏结点的whattomask调用回调函数,并调整whatto值。

notify

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
static void notify(struct tcp_stream *a_tcp, struct half_stream *rcv)
{
struct lurker_node *i, **prev_addr;
char mask;

if (rcv->count_new_urg)
{
if (!rcv->collect_urg)
return;
if (rcv == &a_tcp->client)
mask = COLLECT_ccu;
else
mask = COLLECT_scu;
ride_lurkers(a_tcp, mask);
goto prune_listeners;
}
if (rcv->collect)
{
if (rcv == &a_tcp->client)
mask = COLLECT_cc;
else
mask = COLLECT_sc;
do
{
int total;
a_tcp->read = rcv->count - rcv->offset;
total = a_tcp->read;

ride_lurkers(a_tcp, mask);
if (a_tcp->read > total - rcv->count_new)
rcv->count_new = total - a_tcp->read;

if (a_tcp->read > 0)
{
memmove(rcv->data, rcv->data + a_tcp->read, rcv->count - rcv->offset - a_tcp->read);
rcv->offset += a_tcp->read;
}
}
while (nids_params.one_loop_less && a_tcp->read > 0 && rcv->count_new);
// we know that if one_loop_less!=0, we have only one callback to notify
rcv->count_new = 0;
}
prune_listeners:
prev_addr = &a_tcp->listeners;
i = a_tcp->listeners;
while (i)
if (!i->whatto)
{
*prev_addr = i->next;
free(i);
i = *prev_addr;
}
else
{
prev_addr = &i->next;
i = i->next;
}
}

如果要处理紧急数据,根据数据的方向设定mask并通知给a_tcp的潜伏结点,然后调整潜伏结点,删除所有忽略正常数据及紧急数据的潜伏结点。

如果需要处理正常数据,通知相应的潜伏结点,并适当地调整缓冲区,将上次未处理的数据移至缓冲区起始处,如果nids_params.one_loop_less非零,libnids假定只有一个回调函数,于是循环直到没有未处理的新数据或者不再读取到数据,

add_from_skb

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
static void add_from_skb(struct tcp_stream *a_tcp, struct half_stream *rcv,
struct half_stream *snd,
u_char *data, int datalen,
u_int this_seq, char fin, char urg, u_int urg_ptr)
{
u_int lost = EXP_SEQ - this_seq;
int to_copy, to_copy2;

if (urg && after(urg_ptr, EXP_SEQ - 1) &&
(!rcv->urg_seen || after(urg_ptr, rcv->urg_ptr)))
{
rcv->urg_ptr = urg_ptr;
rcv->urg_seen = 1;
}
if (rcv->urg_seen && after(rcv->urg_ptr + 1, this_seq + lost) &&
before(rcv->urg_ptr, this_seq + datalen))
{
to_copy = rcv->urg_ptr - (this_seq + lost);
if (to_copy > 0)
{
if (rcv->collect)
{
add2buf(rcv, (char *)(data + lost), to_copy);
notify(a_tcp, rcv);
}
else
{
rcv->count += to_copy;
rcv->offset = rcv->count; /* clear the buffer */
}
}
rcv->urgdata = data[rcv->urg_ptr - this_seq];
rcv->count_new_urg = 1;
notify(a_tcp, rcv);
rcv->count_new_urg = 0;
rcv->urg_seen = 0;
rcv->urg_count++;
to_copy2 = this_seq + datalen - rcv->urg_ptr - 1;
if (to_copy2 > 0)
{
if (rcv->collect)
{
add2buf(rcv, (char *)(data + lost + to_copy + 1), to_copy2);
notify(a_tcp, rcv);
}
else
{
rcv->count += to_copy2;
rcv->offset = rcv->count; /* clear the buffer */
}
}
}
else
{
if (datalen - lost > 0)
{
if (rcv->collect)
{
add2buf(rcv, (char *)(data + lost), datalen - lost);
notify(a_tcp, rcv);
}
else
{
rcv->count += datalen - lost;
rcv->offset = rcv->count; /* clear the buffer */
}
}
}
if (fin)
{
snd->state = FIN_SENT;
if (rcv->state == TCP_CLOSING)
add_tcp_closing_timeout(a_tcp);
}
}

接收方从skbuff中添加数据,EXP_SEQ是接收方上次发送的ACK

首先判断是否设置紧急标志,紧急指针在EXP_SEQ(含)之后,并且接收方之前没有看到紧急标志或者本次紧急指针在上次设置的紧急指针之后,如果条件为真,设置紧急指针的相应变量。

再判断是否看到紧急标志,且紧急指针在这次数据中,如果满足条件,设置需要处理的数据大小to_copy,紧急数据的范围是[EXP_SEQ, rcv->urg_ptr),如果大于0,再判断rcv->collect,如果需要存储数据,则将EXP_SEQ之后的to_copy字节其加入缓冲区,再通知相关连接,否则调整rcv->count并清空缓冲区,根据rcv->offset定义,它是缓冲区第一字节在TCP流中的偏移,将其设为接收方收到的总字节数,意味着缓冲区已经指向了数据流末尾,没有数据。接下来rcv->urgdata存储了一字节紧急数据,调整变量,设置正常数据大小to_copy2,其范围是(rcv->urg_ptr, this_seq + datalen)。

如果先前的判断为假,再判断本次数据是否包含EXP_SEQ之后的数据,有则加入缓冲区。

如果设置FIN,修改snd状态为FIN_SENT,如果rcv状态已经是CLOSING,则将a_tcp加入超时关闭的链表中。

tcp_queue

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static void tcp_queue(struct tcp_stream *a_tcp, struct tcphdr *this_tcphdr,
struct half_stream *snd, struct half_stream *rcv,
char *data, int datalen, int skblen)
{
u_int this_seq = ntohl(this_tcphdr->th_seq);
struct skbuff *pakiet, *tmp;

/*
* Did we get anything new to ack?
*/

if (!after(this_seq, EXP_SEQ))
{
if (after(this_seq + datalen + (this_tcphdr->th_flags & TH_FIN), EXP_SEQ))
{
/* the packet straddles our window end */
get_ts(this_tcphdr, &snd->curr_ts);
add_from_skb(a_tcp, rcv, snd, (u_char *)data, datalen, this_seq,
(this_tcphdr->th_flags & TH_FIN),
(this_tcphdr->th_flags & TH_URG),
ntohs(this_tcphdr->th_urp) + this_seq - 1);
/*
* Do we have any old packets to ack that the above
* made visible? (Go forward from skb)
*/
pakiet = rcv->list;
while (pakiet)
{
if (after(pakiet->seq, EXP_SEQ))
break;
if (after(pakiet->seq + pakiet->len + pakiet->fin, EXP_SEQ))
{
add_from_skb(a_tcp, rcv, snd, pakiet->data,
pakiet->len, pakiet->seq, pakiet->fin, pakiet->urg,
pakiet->urg_ptr + pakiet->seq - 1);
}
rcv->rmem_alloc -= pakiet->truesize;
if (pakiet->prev)
pakiet->prev->next = pakiet->next;
else
rcv->list = pakiet->next;
if (pakiet->next)
pakiet->next->prev = pakiet->prev;
else
rcv->listtail = pakiet->prev;
tmp = pakiet->next;
free(pakiet->data);
free(pakiet);
pakiet = tmp;
}
}
else
return;
}
else
{
struct skbuff *p = rcv->listtail;

pakiet = mknew(struct skbuff);
pakiet->truesize = skblen;
rcv->rmem_alloc += pakiet->truesize;
pakiet->len = datalen;
pakiet->data = malloc(datalen);
if (!pakiet->data)
nids_params.no_mem("tcp_queue");
memcpy(pakiet->data, data, datalen);
pakiet->fin = (this_tcphdr->th_flags & TH_FIN);
/* Some Cisco - at least - hardware accept to close a TCP connection
* even though packets were lost before the first TCP FIN packet and
* never retransmitted; this violates RFC 793, but since it really
* happens, it has to be dealt with... The idea is to introduce a 10s
* timeout after TCP FIN packets were sent by both sides so that
* corresponding libnids resources can be released instead of waiting
* for retransmissions which will never happen. -- Sebastien Raveau
*/
if (pakiet->fin)
{
snd->state = TCP_CLOSING;
if (rcv->state == FIN_SENT || rcv->state == FIN_CONFIRMED)
add_tcp_closing_timeout(a_tcp);
}
pakiet->seq = this_seq;
pakiet->urg = (this_tcphdr->th_flags & TH_URG);
pakiet->urg_ptr = ntohs(this_tcphdr->th_urp);
for (;;)
{
if (!p || !after(p->seq, this_seq))
break;
p = p->prev;
}
if (!p)
{
pakiet->prev = 0;
pakiet->next = rcv->list;
if (rcv->list)
rcv->list->prev = pakiet;
rcv->list = pakiet;
if (!rcv->listtail)
rcv->listtail = pakiet;
}
else
{
pakiet->next = p->next;
p->next = pakiet;
pakiet->prev = p;
if (pakiet->next)
pakiet->next->prev = pakiet;
else
rcv->listtail = pakiet;
}
}
}

如果this_seq <= EXP_SEQ,再判断是否this_seq + datalen + (this_tcphdr->th_flags & TH_FIN) > EXP_SEQ,也就是判断本次数据中是否有未确认的数据,如果有,设置时间戳并从skbuff中添加数据。接下来处理之前未被确认的数据(如果有)。

  • 如果TCP队列中有数据,遍历链表,将EXP_SEQ在[pakiet->seq, pakiet->seq + pakiet->len + pakiet->fin]中的数据加入,如果EXP_SEQ <= pakiet->seq,以后再处理这些报文段,如果pakiet->seq + pakiet->len + pakiet->fin <= EXP_SEQ,该报文段已经处理过。然后调整未处理报文段的链表并释放所占用的空间。

如果这次数据均已被确认,直接返回。

现在处理报文段序列号在ACK之后的情况,等待确认,为报文段分配相应空间,调整rcv->rmem_alloc,如果设置FIN,调整发送方状态为TCP_CLOSING,如果接收方状态是FIN_SENT或者FIN_CONFIRMED,将该连接加入超时关闭的队列。剩下的工作是将该报文段插入缓存的TCP队列中,根据序列号从小到大排序。

handle_ack

1
2
3
4
5
6
7
8
9
10
static void handle_ack(struct half_stream *snd, u_int acknum)
{
int ackdiff;

ackdiff = acknum - snd->ack_seq;
if (ackdiff > 0)
{
snd->ack_seq = acknum;
}
}

如果确认了更新的数据,调整ACK

find_stream

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
struct tcp_stream *find_stream(struct tcphdr *this_tcphdr, struct ip *this_iphdr,
int *from_client)
{
struct tuple4 this_addr, reversed;
struct tcp_stream *a_tcp;

this_addr.source = ntohs(this_tcphdr->th_sport);
this_addr.dest = ntohs(this_tcphdr->th_dport);
this_addr.saddr = this_iphdr->ip_src.s_addr;
this_addr.daddr = this_iphdr->ip_dst.s_addr;
a_tcp = nids_find_tcp_stream(&this_addr);
if (a_tcp)
{
*from_client = 1;
return a_tcp;
}
reversed.source = ntohs(this_tcphdr->th_dport);
reversed.dest = ntohs(this_tcphdr->th_sport);
reversed.saddr = this_iphdr->ip_dst.s_addr;
reversed.daddr = this_iphdr->ip_src.s_addr;
a_tcp = nids_find_tcp_stream(&reversed);
if (a_tcp)
{
*from_client = 0;
return a_tcp;
}
return 0;
}

根据TCP首部和IP首部,调用nids_find_tcp_stream在哈希表中寻找连接,this_addr是与this_tcphdr同方向的四元组,reversed则是反向,先寻找同向的连接,没找到则寻找反向连接,仍未找到则返回0。

nids_find_tcp_stream

1
2
3
4
5
6
7
8
9
10
11
struct tcp_stream *nids_find_tcp_stream(struct tuple4 *addr)
{
int hash_index;
struct tcp_stream *a_tcp;

hash_index = mk_hash_index(*addr);
for (a_tcp = tcp_stream_table[hash_index];
a_tcp && memcmp(&a_tcp->addr, addr, sizeof (struct tuple4));
a_tcp = a_tcp->next_node);
return a_tcp ? a_tcp : 0;
}

计算哈希值,在对应位置的链表中顺序搜索addr,没有则返回0。

tcp_exit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
void tcp_exit(void)
{
int i;
struct lurker_node *j;
struct tcp_stream *a_tcp, *t_tcp;

if (!tcp_stream_table || !streams_pool)
return;
for (i = 0; i < tcp_stream_table_size; i++)
{
a_tcp = tcp_stream_table[i];
while (a_tcp)
{
t_tcp = a_tcp;
a_tcp = a_tcp->next_node;
for (j = t_tcp->listeners; j; j = j->next)
{
t_tcp->nids_state = NIDS_EXITING;
(j->item)(t_tcp, &j->data);
}
nids_free_tcp_stream(t_tcp);
}
}
free(tcp_stream_table);
tcp_stream_table = NULL;
free(streams_pool);
streams_pool = NULL;
/* FIXME: anything else we should free? */
/* yes plz.. */
tcp_latest = tcp_oldest = NULL;
tcp_num = 0;
}

遍历哈希表,将TCP连接的nids状态设为nids_state并调用所有的潜伏结点进行处理,然后释放该连接。释放哈希表,释放为流分配的空间,调整相关变量。

process_tcp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
void process_tcp(u_char *data, int skblen)
{
struct ip *this_iphdr = (struct ip *)data;
struct tcphdr *this_tcphdr = (struct tcphdr *)(data + 4 * this_iphdr->ip_hl);
int datalen, iplen;
int from_client = 1;
unsigned int tmp_ts;
struct tcp_stream *a_tcp;
struct half_stream *snd, *rcv;

ugly_iphdr = this_iphdr;
iplen = ntohs(this_iphdr->ip_len);
if ((unsigned)iplen < 4 * this_iphdr->ip_hl + sizeof(struct tcphdr))
{
nids_params.syslog(NIDS_WARN_TCP, NIDS_WARN_TCP_HDR, this_iphdr,
this_tcphdr);
return;
} // ktos sie bawi

datalen = iplen - 4 * this_iphdr->ip_hl - 4 * this_tcphdr->th_off;

if (datalen < 0)
{
nids_params.syslog(NIDS_WARN_TCP, NIDS_WARN_TCP_HDR, this_iphdr,
this_tcphdr);
return;
} // ktos sie bawi

if ((this_iphdr->ip_src.s_addr | this_iphdr->ip_dst.s_addr) == 0)
{
nids_params.syslog(NIDS_WARN_TCP, NIDS_WARN_TCP_HDR, this_iphdr,
this_tcphdr);
return;
}
if (!(this_tcphdr->th_flags & TH_ACK))
detect_scan(this_iphdr);
if (!nids_params.n_tcp_streams) return;
if (my_tcp_check(this_tcphdr, iplen - 4 * this_iphdr->ip_hl,
this_iphdr->ip_src.s_addr, this_iphdr->ip_dst.s_addr))
{
nids_params.syslog(NIDS_WARN_TCP, NIDS_WARN_TCP_HDR, this_iphdr,
this_tcphdr);
return;
}
#if 0
check_flags(this_iphdr, this_tcphdr);
//ECN
#endif
if (!(a_tcp = find_stream(this_tcphdr, this_iphdr, &from_client)))
{
if ((this_tcphdr->th_flags & TH_SYN) &&
!(this_tcphdr->th_flags & TH_ACK) &&
!(this_tcphdr->th_flags & TH_RST))
add_new_tcp(this_tcphdr, this_iphdr);
return;
}
if (from_client)
{
snd = &a_tcp->client;
rcv = &a_tcp->server;
}
else
{
rcv = &a_tcp->client;
snd = &a_tcp->server;
}
if ((this_tcphdr->th_flags & TH_SYN))
{
if (from_client || a_tcp->client.state != TCP_SYN_SENT ||
a_tcp->server.state != TCP_CLOSE || !(this_tcphdr->th_flags & TH_ACK))
return;
if (a_tcp->client.seq != ntohl(this_tcphdr->th_ack))
return;
a_tcp->server.state = TCP_SYN_RECV;
a_tcp->server.seq = ntohl(this_tcphdr->th_seq) + 1;
a_tcp->server.first_data_seq = a_tcp->server.seq;
a_tcp->server.ack_seq = ntohl(this_tcphdr->th_ack);
a_tcp->server.window = ntohs(this_tcphdr->th_win);
if (a_tcp->client.ts_on)
{
a_tcp->server.ts_on = get_ts(this_tcphdr, &a_tcp->server.curr_ts);
if (!a_tcp->server.ts_on)
a_tcp->client.ts_on = 0;
}
else a_tcp->server.ts_on = 0;
if (a_tcp->client.wscale_on)
{
a_tcp->server.wscale_on = get_wscale(this_tcphdr, &a_tcp->server.wscale);
if (!a_tcp->server.wscale_on)
{
a_tcp->client.wscale_on = 0;
a_tcp->client.wscale = 1;
a_tcp->server.wscale = 1;
}
}
else
{
a_tcp->server.wscale_on = 0;
a_tcp->server.wscale = 1;
}
return;
}
if (
! (!datalen && ntohl(this_tcphdr->th_seq) == rcv->ack_seq)
&&
(!before(ntohl(this_tcphdr->th_seq), rcv->ack_seq + rcv->window * rcv->wscale) ||
before(ntohl(this_tcphdr->th_seq) + datalen, rcv->ack_seq)
)
)
return;

if ((this_tcphdr->th_flags & TH_RST))
{
if (a_tcp->nids_state == NIDS_DATA)
{
struct lurker_node *i;

a_tcp->nids_state = NIDS_RESET;
for (i = a_tcp->listeners; i; i = i->next)
(i->item)(a_tcp, &i->data);
}
nids_free_tcp_stream(a_tcp);
return;
}

/* PAWS check */
if (rcv->ts_on && get_ts(this_tcphdr, &tmp_ts) &&
before(tmp_ts, snd->curr_ts))
return;

if ((this_tcphdr->th_flags & TH_ACK))
{
if (from_client && a_tcp->client.state == TCP_SYN_SENT &&
a_tcp->server.state == TCP_SYN_RECV)
{
if (ntohl(this_tcphdr->th_ack) == a_tcp->server.seq)
{
a_tcp->client.state = TCP_ESTABLISHED;
a_tcp->client.ack_seq = ntohl(this_tcphdr->th_ack);
{
struct proc_node *i;
struct lurker_node *j;
void *data;

a_tcp->server.state = TCP_ESTABLISHED;
a_tcp->nids_state = NIDS_JUST_EST;
for (i = tcp_procs; i; i = i->next)
{
char whatto = 0;
char cc = a_tcp->client.collect;
char sc = a_tcp->server.collect;
char ccu = a_tcp->client.collect_urg;
char scu = a_tcp->server.collect_urg;

(i->item)(a_tcp, &data);
if (cc < a_tcp->client.collect)
whatto |= COLLECT_cc;
if (ccu < a_tcp->client.collect_urg)
whatto |= COLLECT_ccu;
if (sc < a_tcp->server.collect)
whatto |= COLLECT_sc;
if (scu < a_tcp->server.collect_urg)
whatto |= COLLECT_scu;
if (nids_params.one_loop_less)
{
if (a_tcp->client.collect >= 2)
{
a_tcp->client.collect = cc;
whatto &= ~COLLECT_cc;
}
if (a_tcp->server.collect >= 2)
{
a_tcp->server.collect = sc;
whatto &= ~COLLECT_sc;
}
}
if (whatto)
{
j = mknew(struct lurker_node);
j->item = i->item;
j->data = data;
j->whatto = whatto;
j->next = a_tcp->listeners;
a_tcp->listeners = j;
}
}
if (!a_tcp->listeners)
{
nids_free_tcp_stream(a_tcp);
return;
}
a_tcp->nids_state = NIDS_DATA;
}
}
// return;
}
}
if ((this_tcphdr->th_flags & TH_ACK))
{
handle_ack(snd, ntohl(this_tcphdr->th_ack));
if (rcv->state == FIN_SENT)
rcv->state = FIN_CONFIRMED;
if (rcv->state == FIN_CONFIRMED && snd->state == FIN_CONFIRMED)
{
struct lurker_node *i;

a_tcp->nids_state = NIDS_CLOSE;
for (i = a_tcp->listeners; i; i = i->next)
(i->item)(a_tcp, &i->data);
nids_free_tcp_stream(a_tcp);
return;
}
}
if (datalen + (this_tcphdr->th_flags & TH_FIN) > 0)
tcp_queue(a_tcp, this_tcphdr, snd, rcv,
(char *) (this_tcphdr) + 4 * this_tcphdr->th_off,
datalen, skblen);
snd->window = ntohs(this_tcphdr->th_win);
if (rcv->rmem_alloc > 65535)
prune_queue(rcv, this_tcphdr);
if (!a_tcp->listeners)
nids_free_tcp_stream(a_tcp);
}

处理TCP连接,data参数指向IP包的起始处,TCP包的偏移在this_iphdr->ip_hl * 4。检测IP包的长度,检测TCP数据的大小,检测IP全0的情况,如果没有设置ACK,调用detect_scan检测扫描。然后如果有必要,进行校验和计算。

如果没有找到该连接,如果是发起连接建立请求的报文段,那么添加新的TCP连接,否则返回。然后根据from_client决定报文段方向,client->server或者server->client。

如果设置了SYN,这是三次握手的第二个报文段,因为如果是第一个报文段,那么在之前已经处理(通过add_new_tcp添加新的TCP连接)。此时client应该处于SYN_SENT,server处于CLOSED,并且设置ACK,如果不满足任一条件那么连接建立失败。第二个报文段需要对第一个报文段进行确认,如果ack与ISN(c)+1不同,连接建立失败。一切正常的话,将server状态转到SYN_RECV,设定第一字节序列号与确认号及其他选项,如果client与server中有一端不支持时间戳或者窗口扩大选项,则关闭对应选项。

如果数据长度不为0或者序列号不等于确认号,并且报文段序列号在接收窗口之外或者整个报文段数据均已确认,则直接返回。

如果设置了RST标志,调用所有的潜伏结点最后一次处理该TCP连接,然后释放掉该连接并返回。

接下来PAWS(Protection Against Wrapping Sequence),处理序列号回绕的情况,根据时间戳(如果有),如果报文段时间戳在发送方当前时间之前,那么这是之前迷失在网络中的报文段到达了发送方,这种情况下直接丢弃。

如果设置了ACK,判断是否为三次握手的第三个报文段,再判断确认号是否正确,如果正确,那么TCP连接就此建立,修改nids状态为NIDS_JUST_EST,调用回调函数,这里用未初始化的局部data隐藏了形参data,如果该回调函数处理数据,修改whatto,如果设置了nids_params.one_loop_less,那么只允许一个回调函数处理TCP连接,再根据whatto建立潜伏结点,如果该连接没有潜伏结点来处理,释放为该连接分配的资源。最后修改nids状态为NIDS_DATA

再次判断是否设置ACK,并调整确认号,如果接收方状态为FIN_SENT,那么这已经是四次挥手第二个报文段,将接收方状态修改为FIN_CONFIRMED,接下来判断是否完成四次挥手,满足条件就释放该连接。

如果有任何数据或者是FIN报文段,将其加入TCP队列等待处理。

接下来设置发送方的窗口,如果接收方缓存的数据过多,将其丢弃。如果该TCP连接不再被监听,则将其释放。

nids_discard

1
2
3
4
5
void nids_discard(struct tcp_stream *a_tcp, int num)
{
if (num < a_tcp->read)
a_tcp->read = num;
}

如果满足条件,修改已读取的数据为num

process_icmp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
void process_icmp(u_char *data)
{
struct ip *iph = (struct ip *) data;
struct ip *orig_ip;
STRUCT_ICMP *pkt;
struct tcphdr *th;
struct half_stream *hlf;
int match_addr;
struct tcp_stream *a_tcp;
struct lurker_node *i;

int from_client;
/* we will use unsigned, to suppress warning; we must be careful with
possible wrap when substracting
the following is ok, as the ip header has already been sanitized */
unsigned int len = ntohs(iph->ip_len) - (iph->ip_hl << 2);

if (len < sizeof(STRUCT_ICMP))
return;
pkt = (STRUCT_ICMP *) (data + (iph->ip_hl << 2));
if (ip_compute_csum((char *) pkt, len))
return;
if (pkt->ICMP_TYPE != ICMP_DEST_UNREACH)
return;
/* ok due to check 7 lines above */
len -= sizeof(STRUCT_ICMP);
// sizeof(struct icmp) is not what we want here

if (len < sizeof(struct ip))
return;

orig_ip = (struct ip *) (((char *) pkt) + 8);
if (len < (unsigned)(orig_ip->ip_hl << 2) + 8)
return;
/* subtraction ok due to the check above */
len -= orig_ip->ip_hl << 2;
if ((pkt->ICMP_CODE & 15) == ICMP_PROT_UNREACH ||
(pkt->ICMP_CODE & 15) == ICMP_PORT_UNREACH)
match_addr = 1;
else
match_addr = 0;
if (pkt->ICMP_CODE > NR_ICMP_UNREACH)
return;
if (match_addr && (iph->ip_src.s_addr != orig_ip->ip_dst.s_addr))
return;
if (orig_ip->ip_p != IPPROTO_TCP)
return;
th = (struct tcphdr *) (((char *) orig_ip) + (orig_ip->ip_hl << 2));
if (!(a_tcp = find_stream(th, orig_ip, &from_client)))
return;
if (a_tcp->addr.dest == iph->ip_dst.s_addr)
hlf = &a_tcp->server;
else
hlf = &a_tcp->client;
if (hlf->state != TCP_SYN_SENT && hlf->state != TCP_SYN_RECV)
return;
a_tcp->nids_state = NIDS_RESET;
for (i = a_tcp->listeners; i; i = i->next)
(i->item)(a_tcp, &i->data);
nids_free_tcp_stream(a_tcp);
}

处理目的不可达导致的ICMP报文,为该TCP连接调用潜伏结点并释放。

github其他关于libnids的注释

zhyq/Libnids,本文部分参考其中tcp.c的注释。。

------ 本文结束 ------

版权声明

Memory is licensed under a Creative Commons BY-NC-SA 4.0 International License.
博客采用知识共享署署名(BY)-非商业性(NC)-相同方式共享(SA)
本文首发于Memory,转载请保留出处。