0%

Blogs

Go Slices: usage and internals
Arrays, slices (and strings): The mechanics of 'append'

Details

make

Look up in $GOROOT/src/runtime/slice.go:makeslice.

append

https://stackoverflow.com/a/33405824/13133551

slicing

The length is the number of elements referred to by the slice. The capacity is the number of elements in the underlying array (beginning at the element referred to by the slice pointer).

copy

Look up in $GOROOT/src/runtime/slice.go:slicecopy.

The copy function supports copying between slices of different lengths (it will copy only up to the smaller number of elements). In addition, copy can handle source and destination slices that share the same underlying array, handling overlapping slices correctly.

Codes

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package main

import (
"os"

logger "github.com/sirupsen/logrus"
"github.com/x-cray/logrus-prefixed-formatter"
)

func assertPanic(v bool) {
if !v {
panic("v is not true")
}
}

func init() {
logger.SetFormatter(&prefixed.TextFormatter{
TimestampFormat: "2006-01-02 15:04:05",
FullTimestamp: true,
ForceFormatting: true,
DisableColors: true,
})
logger.SetOutput(os.Stdout)
logger.SetLevel(logger.DebugLevel)
}

func sliceCopy() {
logger.Infof("in sliceCopy")
s := make([]int, 0)
for i := 0; i < 10; i++ {
s = append(s, i)
}

c := make([]int, 0)
copy(c, s)
logger.Infof("c: %v", c)

c = make([]int, 5)
copy(c, s)
logger.Infof("c: %v", c)

c = make([]int, 15)
copy(c, s)
logger.Infof("c: %v", c)

c = make([]int, 5, 20)
copy(c, s)
logger.Infof("c: %v", c)

c[0] = 256
logger.Infof("c: %v, s: %v", c, s)
}

func sliceAppend() {
logger.Infof("in sliceAppend")
s := make([]int, 0)
for i := 0; i < 10; i++ {
s = append(s, i)
logger.Infof("after appending: %v, len(s): %v, cap(s): %v", i, len(s), cap(s))
}
for i := 0; i < 10; i++ {
l := len(s)
c := cap(s)
after := append(s, i)
if l == c {
assertPanic(&s[0] != &after[0])
assertPanic(s[0] == after[0])

s[0] += 8
assertPanic(s[0] != after[0])
} else {
assertPanic(&s[0] == &after[0])
assertPanic(s[0] == after[0])

s[0] += 8
assertPanic(s[0] == after[0])
}
s = after
}
}

func resultOfAppend() {
logger.Infof("in resultOfAppend")
s := make([]int, 0, 64)
s = append(s, 1, 2, 3, 4, 5)

x := append(s, 16)
assertPanic(x[5] == 16)

y := append(s, 32)
assertPanic(x[5] == 32)
assertPanic(y[5] == 32)
}

func slicing() {
logger.Infof("in slicing")
s := make([]int, 0, 6)
s = append(s, 1, 2, 3, 4, 5)

y := s[1:3]
assertPanic(len(y) == 2)
assertPanic(cap(y) == 5)

y[0] = 10
assertPanic(s[1] == 10)

y = append(y, 8)
assertPanic(s[3] == 8)
assertPanic(y[2] == 8)

s[3] = 32
assertPanic(y[2] == 32)
}

func main() {
sliceCopy()
sliceAppend()
resultOfAppend()
slicing()
}

Result:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
[2020-03-27 16:04:59]  INFO in sliceCopy
[2020-03-27 16:04:59] INFO c: []
[2020-03-27 16:04:59] INFO c: [0 1 2 3 4]
[2020-03-27 16:04:59] INFO c: [0 1 2 3 4 5 6 7 8 9 0 0 0 0 0]
[2020-03-27 16:04:59] INFO c: [0 1 2 3 4]
[2020-03-27 16:04:59] INFO c: [256 1 2 3 4], s: [0 1 2 3 4 5 6 7 8 9]
[2020-03-27 16:04:59] INFO in sliceAppend
[2020-03-27 16:04:59] INFO after appending: 0, len(s): 1, cap(s): 1
[2020-03-27 16:04:59] INFO after appending: 1, len(s): 2, cap(s): 2
[2020-03-27 16:04:59] INFO after appending: 2, len(s): 3, cap(s): 4
[2020-03-27 16:04:59] INFO after appending: 3, len(s): 4, cap(s): 4
[2020-03-27 16:04:59] INFO after appending: 4, len(s): 5, cap(s): 8
[2020-03-27 16:04:59] INFO after appending: 5, len(s): 6, cap(s): 8
[2020-03-27 16:04:59] INFO after appending: 6, len(s): 7, cap(s): 8
[2020-03-27 16:04:59] INFO after appending: 7, len(s): 8, cap(s): 8
[2020-03-27 16:04:59] INFO after appending: 8, len(s): 9, cap(s): 16
[2020-03-27 16:04:59] INFO after appending: 9, len(s): 10, cap(s): 16
[2020-03-27 16:04:59] INFO in resultOfAppend
[2020-03-27 16:04:59] INFO in slicing

通过观察递归实现,用循环和栈模拟递归实现中结点入栈和出栈的过程。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#include <bits/stdc++.h>
#define DBG(x) cerr << #x << " = " << x << endl

using namespace std;
typedef long long LL;

struct Node {
int val;
Node *left, *right;

Node() : left(NULL), right(NULL) {}

Node(int val) : val(val), left(NULL), right(NULL) {}
};

enum StackState {
AFTER_PUSH_ROOT = 0,
AFTER_PUSH_LEFT = 1,
AFTER_PUSH_RIGHT = 2,
};

void dfs_pre_order(Node *root, vector<int> &ret) {
ret.push_back(root->val);
if (root->left) dfs_pre_order(root->left, ret);
if (root->right) dfs_pre_order(root->right, ret);
}

void dfs_in_order(Node *root, vector<int> &ret) {
if (root->left) dfs_in_order(root->left, ret);
ret.push_back(root->val);
if (root->right) dfs_in_order(root->right, ret);
}

void dfs_post_order(Node *root, vector<int> &ret) {
if (root->left) dfs_post_order(root->left, ret);
if (root->right) dfs_post_order(root->right, ret);
ret.push_back(root->val);
}

void pre_order(Node *root, vector<int> &ret) {
stack<pair<Node *, StackState>> stk;
stk.push(pair<Node *, StackState>(root, AFTER_PUSH_ROOT));
while (!stk.empty()) {
pair<Node *, StackState> &top = stk.top();
switch (top.second) {
case AFTER_PUSH_ROOT:
ret.push_back(top.first->val);
if (top.first->left) stk.push(pair<Node *, StackState>(top.first->left, AFTER_PUSH_ROOT));
top.second = AFTER_PUSH_LEFT;
break;
case AFTER_PUSH_LEFT:
if (top.first->right) stk.push(pair<Node *, StackState>(top.first->right, AFTER_PUSH_ROOT));
top.second = AFTER_PUSH_RIGHT;
break;
case AFTER_PUSH_RIGHT:
stk.pop();
break;
}
}
}

void in_order(Node *root, vector<int> &ret) {
stack<pair<Node *, StackState>> stk;
stk.push(pair<Node *, StackState>(root, AFTER_PUSH_ROOT));
while (!stk.empty()) {
pair<Node *, StackState> &top = stk.top();
switch (top.second) {
case AFTER_PUSH_ROOT:
if (top.first->left) stk.push(pair<Node *, StackState>(top.first->left, AFTER_PUSH_ROOT));
top.second = AFTER_PUSH_LEFT;
break;
case AFTER_PUSH_LEFT:
ret.push_back(top.first->val);
if (top.first->right) stk.push(pair<Node *, StackState>(top.first->right, AFTER_PUSH_ROOT));
top.second = AFTER_PUSH_RIGHT;
break;
case AFTER_PUSH_RIGHT:
stk.pop();
break;
}
}
}

void post_order(Node *root, vector<int> &ret) {
stack<pair<Node *, StackState>> stk;
stk.push(pair<Node *, StackState>(root, AFTER_PUSH_ROOT));
while (!stk.empty()) {
pair<Node *, StackState> &top = stk.top();
switch (top.second) {
case AFTER_PUSH_ROOT:
if (top.first->left) stk.push(pair<Node *, StackState>(top.first->left, AFTER_PUSH_ROOT));
top.second = AFTER_PUSH_LEFT;
break;
case AFTER_PUSH_LEFT:
if (top.first->right) stk.push(pair<Node *, StackState>(top.first->right, AFTER_PUSH_ROOT));
top.second = AFTER_PUSH_RIGHT;
break;
case AFTER_PUSH_RIGHT:
ret.push_back(top.first->val);
stk.pop();
break;
}
}
}

Node *get_tree(int num) {
Node *root = new Node(random());
vector<Node *> nodes;
nodes.push_back(root);

for (int i = 0; i < num; i++) {
unsigned int pos = random() * random() * random() % nodes.size();
if (nodes[pos]->left == NULL && nodes[pos]->right == NULL) {
if (random() % 2 == 0) {
nodes[pos]->left = new Node(random());
nodes.push_back(nodes[pos]->left);
} else {
nodes[pos]->right = new Node(random());
nodes.push_back(nodes[pos]->right);
}
} else if (nodes[pos]->left) {
nodes[pos]->right = new Node(random());
nodes.push_back(nodes[pos]->right);
} else {
nodes[pos]->left = new Node(random());
nodes.push_back(nodes[pos]->left);
}
if (nodes[pos]->left && nodes[pos]->right) {
nodes.erase(nodes.begin() + pos);
}
}

return root;
}

void free_tree(Node *root) {
if (root->left) free_tree(root->left);
if (root->right) free_tree(root->right);
delete root;
}

void test_pre_order() {
Node *tree = get_tree(100000);
vector<int> dfs_ret, ret;

dfs_pre_order(tree, dfs_ret);
pre_order(tree, ret);
assert(dfs_ret == ret);

free_tree(tree);
}

void test_in_order() {
Node *tree = get_tree(100000);
vector<int> dfs_ret, ret;

dfs_in_order(tree, dfs_ret);
in_order(tree, ret);
assert(dfs_ret == ret);

free_tree(tree);
}

void test_post_order() {
Node *tree = get_tree(100000);
vector<int> dfs_ret, ret;

dfs_post_order(tree, dfs_ret);
post_order(tree, ret);
assert(dfs_ret == ret);

free_tree(tree);
}

int main(int argc, char **argv) {
srand(time(NULL));

test_pre_order();
test_in_order();
test_post_order();

return 0;
}

https://leetcode.com/problems/median-of-two-sorted-arrays/

函数\(kth\)表示归并两个数组时得到的第\(k\)个元素,函数的计算过程为,每次从\(A\)\(B\)数组的其中一个数组中,找出\(p\)个元素,这\(p\)个元素不大于归并得到的第\(k\)个元素,接着把这\(p\)个元素排除掉,继续在\(A\)\(B\)数组的剩余部分找归并\(A\)\(B\)数组剩余部分后得到的第\(k-p\)个元素。 特殊地,当\(k=1\)或有一个数组为空时可以直接得到答案。 每次找到一个\(p \in [1, \lfloor \frac{k}{2} \rfloor]\),假设\(A[0:p]\)\(B[0:p]\)都在归并的结果内,比较\(A_p\)\(B_p\),如果\(A_p < B_p\)则说明\(A\)中前\(p\)个元素都不大于归并后的第\(k\)个元素,否则说明\(B\)中前\(p\)个元素都不大于归并后的第\(k\)个元素。

\(p\)\(\lfloor \frac{k}{2} \rfloor\)时时间复杂度为\(O(log(k)) = O(log(n + m))\)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class Solution {
public:
double findMedianSortedArrays(vector<int>& nums1, vector<int>& nums2) {
int n = nums1.size() + nums2.size();
if (n % 2 == 1)
return kth(nums1, 0, nums2, 0, n / 2 + 1);
else
return (kth(nums1, 0, nums2, 0, n / 2) + kth(nums1, 0, nums2, 0, n / 2 + 1)) / 2.0;
}
int kth(vector<int> &nums1, int n1, vector<int> &nums2, int n2, int k) {
if (nums1.size() - n1 > nums2.size() - n2)
return kth(nums2, n2, nums1, n1, k);
if (nums1.size() - n1 == 0)
return nums2[n2 + k - 1];
if (k == 1)
return min(nums1[n1], nums2[n2]);

int p = min(k / 2, int(nums1.size() - n1));
if (nums1[n1 + p - 1] < nums2[n2 + p - 1])
return kth(nums1, n1 + p, nums2, n2, k - p);
else
return kth(nums1, n1, nums2, n2 + p, k - p);
}
};

1
2
$ uname -ra
Linux Rapture 4.15.0-72-generic #81~16.04.1-Ubuntu SMP Tue Nov 26 16:34:21 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux

signal.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#include <cstdio>
#include <cstring>
#include <cstdlib>

#include <time.h>
#include <unistd.h>
#include <signal.h>

using namespace std;

void handle(int num) {
printf("signal num: %d\n", num);
sleep(5);
printf("sleep over\n");
}

int main(int argc, char *argv[]) {
for (int i = 1; i < argc;) {
if (strcmp(argv[i], "--handle") == 0) {
signal(atoi(argv[i + 1]), handle);
i += 2;
continue;
}
i++;
}

printf("pid: %d\n", getpid());

while (true) {
printf("%d\n", time(NULL));
sleep(1);
}
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
$ ./signal &
[1] 6223
pid: 6223
1577155844
1577155845
$ kill -KILL 6223
[1] + 6223 killed ./signal

$ ./signal --handle 9 &
[1] 6261
pid: 6261
1577155864
1577155865
$ kill -KILL 6261
[1] + 6261 killed ./signal --handle 9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
$ ./signal &
[1] 6471
pid: 6471
1577156088
1577156089
$ kill -SIGSTOP 6471
[1] + 6471 suspended (signal) ./signal
......
$ kill -SIGCONT 6471
1577156113
1577156114
1577156115
1577156116
1577156117

$ ./signal --handle 18 19 &
[1] 6566
pid: 6566
1577156249
1577156250
$ kill -SIGSTOP 6566
[1] + 6566 suspended (signal) ./signal --handle 18 19
......
$ kill -SIGCONT 6566
signal num: 18
sleep over
1577156270
1577156271

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
$ ./signal --handle 15 &
[1] 7148
pid: 7148
1577156583
1577156584
1577156585
$ kill -SIGTERM 7148
$ kill -SIGTERM 7148
$ kill -SIGTERM 7148
$ kill -SIGTERM 7148
$ kill -SIGTERM 7148
signal num: 15
sleep over
signal num: 15
sleep over
1577156597
1577156598
1577156599
1577156600
1577156601
......
$ kill -KILL 7148
[1] + 7148 killed ./signal --handle 15

wait.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <cstdio>
#include <cstring>
#include <cstdlib>

#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

using namespace std;

void handle(int num) {
printf("signal num: %d\n", num);
}

int main(int argc, char *argv[]) {
int options = 0;
bool divide_zero = false;
bool invalid_memory = false;

for (int i = 1; i < argc;) {
if (strcmp(argv[i], "--WUNTRACED") == 0) {
options |= WUNTRACED;
i++;
continue;
} else if (strcmp(argv[i], "--WNOHANG") == 0) {
options |= WNOHANG;
i++;
continue;
} else if (strcmp(argv[i], "--WCONTINUED") == 0) {
options |= WCONTINUED;
i++;
continue;
} else if (strcmp(argv[i], "--handle") == 0) {
signal(atoi(argv[i + 1]), handle);
i += 2;
continue;
} else if (strcmp(argv[i], "--divide-zero") == 0) {
divide_zero = true;
i++;
continue;
} else if (strcmp(argv[i], "--invalid-memory") == 0) {
invalid_memory = true;
i++;
continue;
}
i++;
}

pid_t cpid, w;
int status;

cpid = fork();
if (cpid == -1) {
fprintf(stderr, "fork failed\n");
exit(EXIT_FAILURE);
}

if (cpid == 0) {
printf("Child PID is %ld\n", (long) getpid());
if (divide_zero) {
int x = 1337;
int y = x / 0;
printf("%d\n", y);
}
if (invalid_memory) {
*(int *) 0 = 1337;
}
while (true) {
printf("time: %d\n", time(NULL));
sleep(1);
}
} else {
printf("Father PID is %ld\n", (long) getpid());
do {
w = waitpid(cpid, &status, options);
printf("w: %d, WIFEXITED: %d, WIFSIGNALED: %d, WIFSTOPPED: %d, WIFCONTINUED: %d\n",
w, WIFEXITED(status), WIFSIGNALED(status), WIFSTOPPED(status), WIFCONTINUED(status));
if (w == 0) {
continue;
}
if (w == -1) {
fprintf(stderr, "waitpid error\n");
exit(EXIT_FAILURE);
}
if (WIFEXITED(status)) {
printf("exited, status=%d\n", WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
printf("killed by signal %d\n", WTERMSIG(status));
} else if (WIFSTOPPED(status)) {
printf("stopped by signal %d\n", WSTOPSIG(status));
} else if (WIFCONTINUED(status)) {
printf("continued\n");
}
} while ((!WIFEXITED(status) && !WIFSIGNALED(status)) || w == 0);
exit(EXIT_SUCCESS);
}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
$ ./wait &
[1] 7523
Father PID is 7523
Child PID is 7525
time: 1577156913
time: 1577156914
time: 1577156915
time: 1577156916
$ kill 7525
w: 7525, WIFEXITED: 0, WIFSIGNALED: 1, WIFSTOPPED: 0, WIFCONTINUED: 0
killed by signal 15

$ ./wait --handle 15
Father PID is 7566
Child PID is 7567
time: 1577156938
time: 1577156939
time: 1577156940
time: 1577156941
$ kill 7567
signal num: 15
time: 1577156941
time: 1577156942
$ kill -KILL 7567
w: 7567, WIFEXITED: 0, WIFSIGNALED: 1, WIFSTOPPED: 0, WIFCONTINUED: 0
killed by signal 9
1
2
3
4
5
6
7
8
9
10
11
12
13
$ ./wait --divide-zero
Child PID is 7744
Father PID is 7743
w: 7744, WIFEXITED: 0, WIFSIGNALED: 1, WIFSTOPPED: 0, WIFCONTINUED: 0
killed by signal 8

$ ./wait --divide-zero --handle 8
Father PID is 7716
Child PID is 7717
signal num: 8
signal num: 8
signal num: 8
......
1
2
3
4
5
6
7
8
9
10
11
12
$ ./wait --invalid-memory
Father PID is 8085
Child PID is 8086
w: 8086, WIFEXITED: 0, WIFSIGNALED: 1, WIFSTOPPED: 0, WIFCONTINUED: 0
killed by signal 11

$ ./wait --invalid-memory --handle 11
Father PID is 8432
Child PID is 8433
signal num: 11
signal num: 11
......
1
2
3
4
5
6
7
8
9
10
11
$ ./wait --WNOHANG &
Father PID is 9033
Child PID is 9034
w: 0, WIFEXITED: 1, WIFSIGNALED: 0, WIFSTOPPED: 0, WIFCONTINUED: 0
w: 0, WIFEXITED: 1, WIFSIGNALED: 0, WIFSTOPPED: 0, WIFCONTINUED: 0
time: 1577157886
time: 1577157887
......
$ kill 9034
w: 9034, WIFEXITED: 0, WIFSIGNALED: 1, WIFSTOPPED: 0, WIFCONTINUED: 0
killed by signal 15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
$ ./wait --WUNTRACED --WCONTINUED --handle 15 &
[1] 9312
Child PID is 9314
time: 1577158011
Father PID is 9312
time: 1577158013
time: 1577158014
$ kill 9314
signal num: 15
time: 1577158015
time: 1577158016
......
time: 1577158025
time: 1577158026
time: 1577158027
time: 1577158028
$ kill -SIGSTOP 9314
w: 9314, WIFEXITED: 0, WIFSIGNALED: 0, WIFSTOPPED: 1, WIFCONTINUED: 0
stopped by signal 19
$ kill -SIGCONT 9314
w: 9314, WIFEXITED: 0, WIFSIGNALED: 0, WIFSTOPPED: 0, WIFCONTINUED: 1
continued
time: 1577158038
...
time: 1577158041
$ kill -SIGKILL 9314
w: 9314, WIFEXITED: 0, WIFSIGNALED: 1, WIFSTOPPED: 0, WIFCONTINUED: 0
killed by signal 9

消费者连接NSQD或NSQLookupd

消费者可以连接多个NSQLookupd,消费者从每一个NSQLookupd查询拥有需要消费的TopicNSQD,并与查询得到的每一个NSQD建立连接。 每当NSQDTopicChannle出现了变动,NSQD会通知每一个与它相连的NSQLookupd

一条消息的生命周期

一条消息首先被生产者投递到NSQDTopic,然后由Topic.messagePump这个方法分发给TopicChannel

nsq/nsqd/protocol_v2.go中的protocolV2.IOLoop方法一边根据客户端的接收情况把Channel中的消息发给客户端(protocolV2.messagePump),一遍处理来自客户端的指令如REQFIN等。 消息发给客户端之前,首先push到InFlightQueue这个优先队列中,这个队列可以表示正在处理中的消息。 InFlightQueue是最小堆,用于比较的属性是过期时刻。 每当消费者发送表示执行成功的FIN指令给NSQD时,NSQD就会把执行成功的消息从对应ChannelInFlightQueue删除; 如果直到超时还未收到消费者的FIN指令,那么对应的消息会从InFlightQueue删除,然后重新投递给Channel; 如果收到REQ指令,说明对应消息需要重新执行,这条消息会从InFlightQueue删除然后进入DeferredQueue,等到需要被执行的时刻再重新投递给ChannelDeferredQueue也是最小堆,用于比较的属性是消息被执行的时刻。

启动和关闭

DeferredQueue和InFlightQueue

NSQD启动时,通过Metadata生成TopicChannel,并根据TopicChannel找到对应的文件队列。

NSQD关闭时,每一个ChannelmemoryMsgChan的消息会被写到对应的文件队列里,inFlightMessagesdeferredMessages同上。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#include <bits/stdc++.h>
#include <unistd.h>
#include <sys/resource.h>
#include <sys/time.h>

#define DBG(x) cerr << #x << " = " << x << endl

using namespace std;
typedef long long LL;

double cpu_load(double start, double end, double used) {
return used / (end - start) * 100;
}

double cpu_time_used_s() {
rusage usage;
getrusage(RUSAGE_SELF, &usage);
return usage.ru_stime.tv_sec + double(usage.ru_stime.tv_usec) / 1000000 + usage.ru_utime.tv_sec + double(usage.ru_utime.tv_usec) / 1000000;
}

double get_time_s() {
timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + double(tv.tv_usec) / 1000000;
}

int main(int argc, char **argv) {
double start = get_time_s();

srand(time(NULL));
int x = rand();
for (int i = 0; i < 300000000; i++) {
x ^= rand();
x |= rand();
x &= ~rand();
}

sleep(5);

double end = get_time_s();
double real_time_used = end - start;
double cpu_time_used = cpu_time_used_s();

printf("start: %.3fs, end: %.3fs\n"
"real_time_used: %.3f\n"
"cpu_time_used: %.3fs, cpu_load: %.3f%%\n",
start, end, real_time_used, cpu_time_used, cpu_load(start, end, cpu_time_used));

return 0;
}
1
2
3
start: 1576392425.566s, end: 1576392438.229s
real_time_used: 12.663
cpu_time_used: 6.230s, cpu_load: 49.201%

本文参考了Redis源码3.0分支和《Redis设计与实现》。

对象

Redis基于下面提到的底层数据结构创建了一个对象系统,这个系统包括StringListSetHashSorted Set这五种对象,每种对象都用到了至少一种底层数据结构。Redis中的每个对象都由一个redisObject结构表示,该结构中和保存数据有关的三个属性分别是typeencodingptr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/* Object types */
#define REDIS_STRING 0
#define REDIS_LIST 1
#define REDIS_SET 2
#define REDIS_ZSET 3
#define REDIS_HASH 4

/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
* is set to one of this fields for this object. */
#define REDIS_ENCODING_RAW 0 /* Raw representation */
#define REDIS_ENCODING_INT 1 /* Encoded as integer */
#define REDIS_ENCODING_HT 2 /* Encoded as hash table */
#define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
#define REDIS_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */
#define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
#define REDIS_ENCODING_INTSET 6 /* Encoded as intset */
#define REDIS_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
#define REDIS_ENCODING_EMBSTR 8 /* Embedded sds string encoding */

typedef struct redisObject {
unsigned type:4;
unsigned encoding:4;
unsigned lru:REDIS_LRU_BITS; /* lru time (relative to server.lruclock) */
int refcount;
void *ptr;
} robj;

底层数据结构

SDS - Simple Dynamic String

SDS是二进制安全的。

定义

1
2
3
4
5
6
7
typedef char *sds;

struct sdshdr {
unsigned int len;
unsigned int free;
char buf[];
};

API

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
* end of the specified sds string 's'.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
struct sdshdr *sh;
size_t curlen = sdslen(s);

s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
sh = (void*) (s-(sizeof(struct sdshdr)));
memcpy(s+curlen, t, len);
sh->len = curlen+len;
sh->free = sh->free-len;
s[curlen+len] = '\0';
return s;
}

/* Append the specified null termianted C string to the sds string 's'.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscat(sds s, const char *t) {
return sdscatlen(s, t, strlen(t));
}

List

就是大家都学过的链表,方法名也大多顾名思义。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/* Node, List, and Iterator are the only data structures used currently. */

typedef struct listNode {
struct listNode *prev;
struct listNode *next;
void *value;
} listNode;

typedef struct listIter {
listNode *next;
int direction;
} listIter;

typedef struct list {
listNode *head;
listNode *tail;
void *(*dup)(void *ptr);
void (*free)(void *ptr);
int (*match)(void *ptr, void *key);
unsigned long len;
} list;

/* Functions implemented as macros */
#define listLength(l) ((l)->len)
#define listFirst(l) ((l)->head)
#define listLast(l) ((l)->tail)
#define listPrevNode(n) ((n)->prev)
#define listNextNode(n) ((n)->next)
#define listNodeValue(n) ((n)->value)

#define listSetDupMethod(l,m) ((l)->dup = (m))
#define listSetFreeMethod(l,m) ((l)->free = (m))
#define listSetMatchMethod(l,m) ((l)->match = (m))

#define listGetDupMethod(l) ((l)->dup)
#define listGetFree(l) ((l)->free)
#define listGetMatchMethod(l) ((l)->match)

/* Prototypes */
list *listCreate(void);
void listRelease(list *list);
list *listAddNodeHead(list *list, void *value);
list *listAddNodeTail(list *list, void *value);
list *listInsertNode(list *list, listNode *old_node, void *value, int after);
void listDelNode(list *list, listNode *node);
listIter *listGetIterator(list *list, int direction);
listNode *listNext(listIter *iter);
void listReleaseIterator(listIter *iter);
list *listDup(list *orig);
listNode *listSearchKey(list *list, void *key);
listNode *listIndex(list *list, long index);
void listRewind(list *list, listIter *li);
void listRewindTail(list *list, listIter *li);
void listRotate(list *list);

Dict

Dict的核心就是Separate Chaining Hash Table。 随着操作的不断进行,哈希表保存的键值对会逐渐地增多或减少,为了让哈希表的负载因子(USED/BUCKETS)维持在一个合理的范围之内,当哈希表保存的键值对数量太多或太少时,程序需要对哈希表的大小进行相应的扩展或收缩。

定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;

typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;

typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
int iterators; /* number of iterators currently running */
} dict;

核心方法实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key);

if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}

/* Low level add. This function adds the entry but instead of setting
* a value returns the dictEntry structure to the user, that will make
* sure to fill the value field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned.
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;

if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;

/* Allocate the memory and store the new entry */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;

/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}

dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;

if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}

Skiplist

跳跃表是一种有序数据结构,它通过在每个节点中维持多个指向其他节点的指针,从而达到快速访问节点的目的。 跳跃表支持平均O(logN)、最坏O(N)复杂度的节点查找,还可以通过顺序性操作来批量处理节点。跳跃表的实现比平衡树更简单。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
robj *obj;
double score;
struct zskiplistNode *backward;
struct zskiplistLevel {
struct zskiplistNode *forward;
unsigned int span;
} level[];
} zskiplistNode;

typedef struct zskiplist {
struct zskiplistNode *header, *tail;
unsigned long length;
int level;
} zskiplist;

zskiplist *zslCreate(void);
void zslFree(zskiplist *zsl);
zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj);
int zslDelete(zskiplist *zsl, double score, robj *obj);
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
unsigned int zsetLength(robj *zobj);
void zsetConvert(robj *zobj, int encoding);
unsigned long zslGetRank(zskiplist *zsl, double score, robj *o);

插入时的核心逻辑:

  1. 找到插入的位置
  2. 随机得到新插入节点的level
  3. 处理为了插入当前节点穿过的指针和未穿过的指针的指向和跨度

删除时的核心逻辑:

  1. 找到删除的位置
  2. 处理要删除的节点穿过的指针和未穿过的指针的指向和跨度
  3. 如果可以,减小跳跃表的level

下面这个题可以使用平衡树来解,这里为了练习使用跳跃表,注意根据题意特殊处理。 SPOJ ORDERSET

其他底层数据结构

其他底层数据结构还包括了压缩列表(Ziplist)和整数集合(Intset)等。

对象的实现

Redis对象通过encoding属性来设定对象所使用的编码,而不是为特定类型的对象关联一种特定的编码。Redis可以根据不同的使用场景来为一个对象设置不同的编码,从而优化对象在某一场景下的效率。Redis对象还会根据不同的条件,从一种编码转换成另一种编码。

不同类型和编码的对象:

类型 编码 对象
REDIS_STRING REDIS_ENCODING_INT 使用整数值实现的字符串对象
REDIS_STRING REDIS_ENCODING_EMBSTR 使用 embstr 编码的简单动态字符串实现的字符串对象
REDIS_STRING REDIS_ENCODING_RAW 使用简单动态字符串实现的字符串对象
REDIS_LIST REDIS_ENCODING_ZIPLIST 使用压缩列表实现的列表对象
REDIS_LIST REDIS_ENCODING_LINKEDLIST 使用双端链表实现的列表对象
REDIS_HASH REDIS_ENCODING_ZIPLIST 使用压缩列表实现的哈希对象
REDIS_HASH REDIS_ENCODING_HT 使用字典实现的哈希对象
REDIS_SET REDIS_ENCODING_INTSET 使用整数集合实现的集合对象
REDIS_SET REDIS_ENCODING_HT 使用字典实现的集合对象
REDIS_ZSET REDIS_ENCODING_ZIPLIST 使用压缩列表实现的有序集合对象
REDIS_ZSET REDIS_ENCODING_SKIPLIST 使用跳跃表和字典实现的有序集合对象

Set

Set的编码可以是intsethashtablehashtable编码的集合对象使用字典作为底层实现,字典的每个键都是字符串对象,字典的值则全部为NULL。

Sorted Set

https://news.ycombinator.com/item?id=1171423 Sorted Set的编码可以是ziplistskiplist

1
2
3
4
typedef struct zset {
dict *dict;
zskiplist *zsl;
} zset;
底层数据结构编码为skiplist时,redisObject.ptr指向zset类型。 skiplist本身不支持通过key查value,zset使用dict字典为有序集合维护了一个从成员到分值的映射。

协议格式

The way RESP is used in Redis as a request-response protocol is the following:

  • Clients send commands to a Redis server as a RESP Array of Bulk Strings.
  • The server replies with one of the RESP types according to the command implementation.

In RESP, the type of some data depends on the first byte:

  • For Simple Strings the first byte of the reply is "+"
  • For Errors the first byte of the reply is "-"
  • For Integers the first byte of the reply is ":"
  • For Bulk Strings the first byte of the reply is "$"
  • For Arrays the first byte of the reply is "*"

引用自https://redis.io/topics/protocol


建连与执行命令

Redis服务端通过使用selectpoll等I/O多路复用系统调用来实现事件驱动模型。 Redis中的事件分为两类,一类被称为定时事件,如定期执行serverCron来处理过期逻辑、保存RDB、保存AOF等;另一类被称为文件事件,I/O多路复用函数在处理文件事件时使用,当与文件事件相关联的文件描述符ready即可读或可写时,select等函数返回,Redis使用与ready的文件描述符绑定的函数来处理相应的事件。 与文件描述符绑定的函数可分为三类:

  1. AcceptHandler,用来accept客户端的连接,并且把accept后得到的文件描述符设置为非阻塞O_NONBLOCK
  2. ReadHandler,用来read客户端发过来的数据,并且每当读到一条完整命令后就执行并把结果写到服务端的与该客户端相对应的缓冲区
  3. WriteHandler,用来把服务端的与客户端对应的缓冲区中的数据write给客户端

ReadHandler

实现逻辑参照src/networking.c:readQueryFromClient。 每当fd可读,尝试读REDIS_IOBUF_LEN=1024*16这么多字节的数据到缓冲区,如果nread=0则断开与fd表示的客户端的链接。 当读到的数据不为空时,持续从缓冲区中尝试解析命令并执行,直到缓冲区已经没有完整命令。

WriteHandler

实现逻辑参照src/networking.c:sendReplyToClient。 每当fd可写,循环把与fd对应的返回结果缓冲区中的全部数据都发给对应的客户端,直到全部发完或该次事件处理的totwritten > REDIS_MAX_WRITE_PER_EVENT=1024*64write调用出错。 当write调用出错时,若错误EAGAIN为,则下一轮事件循环再尝试写这个fd;若为其他错误,则关闭与fd对应的客户端连接。


Pipeline

A Request/Response server can be implemented so that it is able to process new requests even if the client didn't already read the old responses. This way it is possible to send multiple commands to the server without waiting for the replies at all, and finally read the replies in a single step.

This is called pipelining, and is a technique widely in use since many decades. For instance many POP3 protocol implementations already supported this feature, dramatically speeding up the process of downloading new emails from the server.

Redis has supported pipelining since the very early days, so whatever version you are running, you can use pipelining with Redis. This is an example using the raw netcat utility:

1
2
3
4
$ (printf "PING\r\nPING\r\nPING\r\n"; sleep 1) | nc localhost 6379
+PONG
+PONG
+PONG

引用自https://redis.io/topics/pipelining#redis-pipelining

按照官方文档,所谓pipeline只是客户端一起把多条命令发给服务端,然后一起读返回结果,而不是每发一条命令读一次返回结果。 在Redis服务端代码中并没有对所谓pipeline的特殊处理,服务端也只是每当能解析出一条命令就执行,然后把返回结果写到缓冲区里,然后在下次事件循环时按照WriteHandler中的逻辑把返回结果write给客户端。

命令行实现pipeline调用

按照官方文档,下面的命令即实现了一次pipeline调用

1
2
3
4
5
6
7
8
$ (printf '*2\r\n$3\r\nget\r\n$5\r\nhello\r\n*3\r\n$3\r\nset\r\n$5\r\nhello\r\n$5\r\nworld\r\n*2\r\n$3\r\nget\r\n$5\r\nhello\r\n*3\r\n$3\r\nset\r\n$8\r\n1 plus 1\r\n$1\r\n2\r\n*2\r\n$3\r\nget\r\n$8\r\n1 plus 1\r\n'; sleep 1) | nc localhost 6379
$-1
+OK
$5
world
+OK
$1
2
修改Redis代码加一些日志后,Redis服务端的输出为:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
Poll and handle: START
Fd: 5 is readable
13021:M 12 Dec 17:44:44.657 - Accepted 127.0.0.1:57151
Create file event on fd: 6, mask: readable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:44:44.657 - readQueryFromClient fd: 6, nread: 144
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is writeable
13021:M 12 Dec 17:44:44.658 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:44:45.638 - readQueryFromClient fd: 6, nread: 0
13021:M 12 Dec 17:44:45.638 - Client closed connection
Delete file event on fd: 6, mask: readable
Poll and handle: END
Wireshark抓包的结果: request response

Python实现pipeline调用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# coding: utf-8

import redis


def main():
client = redis.Redis(host='localhost', port=6379)
pipe = client.pipeline(transaction=False)
for i in range(16 * 1024):
pipe.set(i, i)
pipe.execute()


if __name__ == '__main__':
main()

Redis日志:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
Poll and handle: START
Fd: 5 is readable
13021:M 12 Dec 17:47:02.791 - Accepted 127.0.0.1:57168
Create file event on fd: 6, mask: readable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.001 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.003 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.004 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.004 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.005 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.005 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.006 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.007 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.008 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.008 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.010 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.011 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.011 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.012 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.013 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.013 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.014 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.015 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.016 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.017 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.017 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.018 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.018 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.019 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.019 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.021 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.022 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.022 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.024 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.025 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.025 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.026 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.028 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.028 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.029 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.033 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.033 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.034 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.035 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.035 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.037 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.038 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.038 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.039 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.040 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.040 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.042 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.042 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.042 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.043 - readQueryFromClient fd: 6, nread: 10548
Fd: 6 is writeable
13021:M 12 Dec 17:47:03.044 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13021:M 12 Dec 17:47:03.108 - readQueryFromClient fd: 6, nread: 0
13021:M 12 Dec 17:47:03.108 - Client closed connection
Delete file event on fd: 6, mask: readable
Poll and handle: END

包含大量命令的pipeline

把上面python代码range的参数改为16 * 1024 * 100,出现了13429:M 12 Dec 18:00:18.557 - sendReplyToClient EAGAIN, fd: 6这种日志。


多个客户端同时执行Pipeline会发生什么

range参数改为16 * 1024 * 10,命令行执行python main.py & python main.py。 Redis日志:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
Poll and handle: START
Fd: 5 is readable
13596:M 12 Dec 18:05:49.267 - Accepted 127.0.0.1:57368
Create file event on fd: 6, mask: readable
Poll and handle: END
Poll and handle: START
Fd: 5 is readable
13596:M 12 Dec 18:05:49.269 - Accepted 127.0.0.1:57369
Create file event on fd: 7, mask: readable
Poll and handle: END
13596:M 12 Dec 18:05:53.103 - 2 clients connected (0 slaves), 993008 bytes in use
13596:M 12 Dec 18:05:58.237 - 2 clients connected (0 slaves), 993008 bytes in use
13596:M 12 Dec 18:06:03.371 - 2 clients connected (0 slaves), 993008 bytes in use
13596:M 12 Dec 18:06:08.513 - 2 clients connected (0 slaves), 993008 bytes in use
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.131 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.134 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13596:M 12 Dec 18:06:09.134 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.134 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.135 - readQueryFromClient fd: 6, nread: 16384
Fd: 6 is writeable
13596:M 12 Dec 18:06:09.136 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
................................
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.283 - readQueryFromClient fd: 6, nread: 16384
Fd: 7 is readable
13596:M 12 Dec 18:06:09.284 - readQueryFromClient fd: 7, nread: 16384
Fd: 6 is writeable
13596:M 12 Dec 18:06:09.285 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Fd: 7 is writeable
13596:M 12 Dec 18:06:09.285 - try sendReplyToClient to fd: 7
Delete file event on fd: 7, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.285 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Fd: 7 is readable
13596:M 12 Dec 18:06:09.285 - readQueryFromClient fd: 7, nread: 16384
Create file event on fd: 7, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.286 - readQueryFromClient fd: 6, nread: 16384
Fd: 7 is readable
13596:M 12 Dec 18:06:09.287 - readQueryFromClient fd: 7, nread: 16384
Fd: 6 is writeable
13596:M 12 Dec 18:06:09.287 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Fd: 7 is writeable
13596:M 12 Dec 18:06:09.287 - try sendReplyToClient to fd: 7
Delete file event on fd: 7, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.288 - readQueryFromClient fd: 6, nread: 16384
Create file event on fd: 6, mask: writeable
Fd: 7 is readable
13596:M 12 Dec 18:06:09.288 - readQueryFromClient fd: 7, nread: 16384
Create file event on fd: 7, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:09.289 - readQueryFromClient fd: 6, nread: 16384
Fd: 7 is readable
13596:M 12 Dec 18:06:09.290 - readQueryFromClient fd: 7, nread: 16384
Fd: 6 is writeable
13596:M 12 Dec 18:06:09.291 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Fd: 7 is writeable
13596:M 12 Dec 18:06:09.291 - try sendReplyToClient to fd: 7
Delete file event on fd: 7, mask: writeable
Poll and handle: END
................................
Poll and handle: START
Fd: 6 is writeable
13596:M 12 Dec 18:06:19.984 - try sendReplyToClient to fd: 6
13596:M 12 Dec 18:06:19.984 - sendReplyToClient EAGAIN, fd: 6
Poll and handle: END
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:20.121 - try sendReplyToClient to fd: 7
Poll and handle: END
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:20.122 - try sendReplyToClient to fd: 7
Poll and handle: END
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:20.122 - try sendReplyToClient to fd: 7
Poll and handle: END
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:20.122 - try sendReplyToClient to fd: 7
13596:M 12 Dec 18:06:20.122 - sendReplyToClient EAGAIN, fd: 7
Poll and handle: END
................................
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:23.854 - try sendReplyToClient to fd: 7
13596:M 12 Dec 18:06:23.854 - sendReplyToClient EAGAIN, fd: 7
Poll and handle: END
Poll and handle: START
Fd: 6 is writeable
13596:M 12 Dec 18:06:24.085 - try sendReplyToClient to fd: 6
Poll and handle: END
Poll and handle: START
Fd: 6 is writeable
13596:M 12 Dec 18:06:24.085 - try sendReplyToClient to fd: 6
Delete file event on fd: 6, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:24.192 - try sendReplyToClient to fd: 7
Poll and handle: END
Poll and handle: START
Fd: 7 is writeable
13596:M 12 Dec 18:06:24.192 - try sendReplyToClient to fd: 7
Delete file event on fd: 7, mask: writeable
Poll and handle: END
Poll and handle: START
Fd: 6 is readable
13596:M 12 Dec 18:06:26.373 - readQueryFromClient fd: 6, nread: 0
13596:M 12 Dec 18:06:26.373 - Client closed connection
Delete file event on fd: 6, mask: readable
Poll and handle: END
Poll and handle: START
Fd: 7 is readable
13596:M 12 Dec 18:06:26.510 - readQueryFromClient fd: 7, nread: 0
13596:M 12 Dec 18:06:26.510 - Client closed connection
Delete file event on fd: 7, mask: readable
Poll and handle: END

命令具体实现在redis-3.0/src/redis.c:genRedisInfoString

Memory

  • used_memory是redis通过在每次执行mallocfree等函数的时候维护定义在src/zmalloc.c中的used_memory变量实现的
  • used_memory_rss在linux中是通过读/proc/{pid}/stat这个文件的第24个字段rss得到number of pages the process in real memory然后再乘以sysconf(_SC_PAGESIZE)实现的。sysconf(_SC_PAGESIZE)表示Size of a page in bytes。
  • used_memory_peakRecord the max memory used since the server was started.
  • mem_fragmentation_ratioFragmentation = RSS / allocated-bytes,allocated-bytes即为used_memory

CPU

  • used_cpu_user调用getrusage得到的ru_utime
  • used_cpu_sys调用getrusage得到的ru_stime

实现参考:

  • 《UNIX环境高级编程》16.3.3 地址查询
  • 《UNIX系统编程手册 下》59.10.1
  • man getaddrinfo

addr:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#include <cstdio>
#include <cstring>

#include <iostream>
#include <string>

#include <sys/socket.h>
#include <netdb.h>

using namespace std;

string getnameinfo_str(const sockaddr *addr, socklen_t addrlen, int flags) {
char host[NI_MAXHOST];
char service[NI_MAXSERV];
int ret = getnameinfo(addr, addrlen, host, sizeof(host), service, sizeof(service), flags);
if (ret != 0) {
fprintf(stderr, "getnameinfo failed, ret: %d, str: %s\n", ret, gai_strerror(ret));
}
return host + string(":") + service;
}

void work(const string &host, const string &service, int hints_flags, int getaddrinfo_flags) {
printf("host: %s, service: %s, hints_flags: %d, getaddrinfo_flags: %d\n", host.c_str(), service.c_str(), hints_flags, getaddrinfo_flags);
addrinfo hints;
addrinfo *result;
memset(&hints, 0, sizeof(hints));

hints.ai_socktype = SOCK_STREAM;
hints.ai_family = AF_UNSPEC;
hints.ai_flags = hints_flags;

int ret = getaddrinfo(host == "-" ? NULL : host.c_str(), service == "-" ? NULL : service.c_str(), &hints, &result);
if (ret != 0) {
fprintf(stderr, "getaddrinfo, host: %s, service: %s, ret: %d, str: %s\n", host.c_str(), service.c_str(), ret, gai_strerror(ret));
return;
}
for (addrinfo *p = result; p != NULL; p = p->ai_next) {
printf("addr: %s, flags: %d, family: %d, socktype: %d, procotol: %d\n",
getnameinfo_str(p->ai_addr, p->ai_addrlen, getaddrinfo_flags).c_str(), p->ai_flags, p->ai_family, p->ai_socktype, p->ai_protocol);
if (p == result && (hints_flags & AI_CANONNAME)) {
printf("canonname: %s\n", p->ai_canonname);
}
}

freeaddrinfo(result);

puts("-----------");
}

int main(int argc, char **argv) {
string host, service;
int hints_flags, getaddrinfo_flags;
while (cin >> host >> service >> hints_flags >> getaddrinfo_flags) {
work(host, service, hints_flags, getaddrinfo_flags);
}

work("-", "https", AI_PASSIVE, 0);
work("-", "https", 0, 0);
work("-", "https", AI_PASSIVE | AI_CANONNAME, 0);

work("127.0.0.1", "https", AI_PASSIVE, 0);
work("127.0.0.1", "https", AI_PASSIVE | AI_CANONNAME, 0);
work("127.0.0.1", "https", AI_PASSIVE | AI_CANONNAME, NI_NUMERICHOST | NI_NUMERICSERV);

work("localhost", "http", AI_CANONNAME, 0);
work("localhost", "http", AI_CANONNAME, NI_NUMERICHOST | NI_NUMERICSERV);

work("baidu.com", "http", AI_CANONNAME, 0);
work("baidu.com", "http", AI_CANONNAME, NI_NUMERICHOST | NI_NUMERICSERV);

work("google.com", "http", AI_CANONNAME, 0);
work("google.com", "http", AI_CANONNAME, NI_NUMERICHOST | NI_NUMERICSERV);

return 0;
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
$ ./addr
host: -, service: https, hints_flags: 1, getaddrinfo_flags: 0
addr: :::https, flags: 0, family: 30, socktype: 1, procotol: 6
addr: 0.0.0.0:https, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: -, service: https, hints_flags: 0, getaddrinfo_flags: 0
addr: localhost:https, flags: 0, family: 30, socktype: 1, procotol: 6
addr: localhost:https, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: -, service: https, hints_flags: 3, getaddrinfo_flags: 0
addr: :::https, flags: 0, family: 30, socktype: 1, procotol: 6
canonname: localhost
addr: 0.0.0.0:https, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: 127.0.0.1, service: https, hints_flags: 1, getaddrinfo_flags: 0
addr: localhost:https, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: 127.0.0.1, service: https, hints_flags: 3, getaddrinfo_flags: 0
addr: localhost:https, flags: 0, family: 2, socktype: 1, procotol: 6
canonname: (null)
-----------
host: 127.0.0.1, service: https, hints_flags: 3, getaddrinfo_flags: 10
addr: 127.0.0.1:443, flags: 0, family: 2, socktype: 1, procotol: 6
canonname: (null)
-----------
host: localhost, service: http, hints_flags: 2, getaddrinfo_flags: 0
addr: localhost:http, flags: 0, family: 30, socktype: 1, procotol: 6
canonname: localhost
addr: localhost:http, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: localhost, service: http, hints_flags: 2, getaddrinfo_flags: 10
addr: ::1:80, flags: 0, family: 30, socktype: 1, procotol: 6
canonname: localhost
addr: 127.0.0.1:80, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: baidu.com, service: http, hints_flags: 2, getaddrinfo_flags: 0
addr: 220.181.38.148:http, flags: 0, family: 2, socktype: 1, procotol: 6
canonname: baidu.com
addr: 39.156.69.79:http, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: baidu.com, service: http, hints_flags: 2, getaddrinfo_flags: 10
addr: 220.181.38.148:80, flags: 0, family: 2, socktype: 1, procotol: 6
canonname: baidu.com
addr: 39.156.69.79:80, flags: 0, family: 2, socktype: 1, procotol: 6
-----------
host: google.com, service: http, hints_flags: 2, getaddrinfo_flags: 0
addr: hkg07s22-in-f110.1e100.net:http, flags: 0, family: 2, socktype: 1, procotol: 6
canonname: google.com
addr: hkg07s22-in-x0e.1e100.net:http, flags: 0, family: 30, socktype: 1, procotol: 6
-----------
host: google.com, service: http, hints_flags: 2, getaddrinfo_flags: 10
addr: 216.58.199.110:80, flags: 0, family: 2, socktype: 1, procotol: 6
canonname: google.com
addr: 2404:6800:4005:803::200e:80, flags: 0, family: 30, socktype: 1, procotol: 6
-----------