MySql 分表、分区

MySql 分表
比如记录用户操作日志, 这种表数据量将会很大, 那么事先创建N个表(结构一样), 表名如 log_00、log_01、log_02…
目前建议采用第一种, 第二种限制较多

第一种:利用hash算法,将不同用户的操作日志分配至不同的表中,这里创建 100 张表
1
2
3
4
5
6
7
8
9
10
11
function get_hash_table($table, $userId)
{
$str = crc32($userid);
if ($str < 0) {
$hash = '0' . substr(abs($str), 0, 1);
} else {
$hash = substr($str, 0, 2);
}

return $table . '_' . $hash;
}
第二种:利用 merge 存储引擎来分表(表结构需一致且子表存储引擎必须为MyISAM)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
CREATE TABLE `log_00` (
`id` INT(11) NOT NULL,
`user_id` int(11) UNSIGNED NOT NULL,
`desc` varchar(255) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;

CREATE TABLE `log_01` (
`id` INT(11) NOT NULL,
`user_id` int(11) UNSIGNED NOT NULL,
`desc` varchar(255) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;

#INSERT_METHOD:NO(禁止插入)|FIRST(插入UNION列出的第一个表)|LAST(插入UNION列出的最后一个表)
CREATE TABLE `log` (
`id` INT(11) NOT NULL,
`user_id` int(11) UNSIGNED NOT NULL,
`desc` varchar(255) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MRG_MyISAM UNION=(log_00, log_01) INSERT_METHOD=LAST DEFAULT CHARSET=utf8;

#插入数据,使用 redis 生成全局ID
INSERT INTO `log_00`(`id`, `user_id`, `desc`) VALUES(1, 1, 'A');
INSERT INTO `log_00`(`id`, `user_id`, `desc`) VALUES(2, 2, 'B');
INSERT INTO `log_01`(`id`, `user_id`, `desc`) VALUES(3, 3, 'C');
INSERT INTO `log_01`(`id`, `user_id`, `desc`) VALUES(4, 4, 'D');
INSERT INTO `log`(`id`, `user_id`, `desc`) VALUES(5, 5, 'E');

#查询
SELECT * FROM log;

MySql 分区

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#1. 判断 MySql 是否支持分区, 第一条命令是 5.6 以下版本, 第二条命令是 5.6 及以上版本
show variables like '%partition%'
show plugins

#2. 分区文档 https://dev.mysql.com/doc/refman/5.7/en/partitioning.html

#3. 分区限制, 一个表最多只能有1024个分区

#4. range columns 与 range 的区别:https://dev.mysql.com/doc/refman/5.7/en/partitioning-columns-range.html

#5. 关于 range columnus 多列, 举个例子:
CREATE TABLE `test` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`type` tinyint(1) NOT NULL COMMENT '类型 1:评论,2:点赞',
`content` varchar(16) NOT NULL,
`read_status` tinyint(1) NOT NULL COMMENT '读取状态 0:未读,1:已读',
PRIMARY KEY (`id`,`type`,`read_status`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8
PARTITION BY RANGE COLUMNS(`type`,read_status)
(PARTITION p0 VALUES LESS THAN (1,1) ENGINE = InnoDB,
PARTITION p1 VALUES LESS THAN (1,2) ENGINE = InnoDB,
PARTITION p2 VALUES LESS THAN (2,1) ENGINE = InnoDB,
PARTITION p3 VALUES LESS THAN (2,2) ENGINE = InnoDB);
INSERT INTO `test`(`type`, `content`, `read_status`) VALUES (1, 'A', 0);
INSERT INTO `test`(`type`, `content`, `read_status`) VALUES (1, 'B', 1);
INSERT INTO `test`(`type`, `content`, `read_status`) VALUES (2, 'C', 0);
INSERT INTO `test`(`type`, `content`, `read_status`) VALUES (2, 'D', 1);
#结果是四条数据各占一个分区
SELECT PARTITION_NAME,TABLE_ROWS FROM information_schema.`PARTITIONS` WHERE table_name = 'test';
#因为 range columnus 是基于 元组(列值列表) 之间的比较, 不同于 range 分区的标量值之间的比较,
#比如上面例子的 p0 分区 (1,1), 那么当插入第一条数据时, type = 1, read_status = 0, 由于 type 相等, 所以比较第二个值, 0 < 1, 所以第一条数据分配在 p0 分区, 以此类推

相关例子
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#消息表
CREATE TABLE `message` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`type` tinyint(1) NOT NULL COMMENT '类型 1:评论,2:点赞,3:红包,4:交易',
`content` varchar(16) NOT NULL,
`generate_time` datetime NOT NULL,
PRIMARY KEY (`id`),
KEY `generate_time` (`generate_time`)
) ENGINE=InnoDB AUTO_INCREMENT=5000001 DEFAULT CHARSET=utf8;

#生成随机数
CREATE FUNCTION `generate_random`(n INT) RETURNS varchar(255) CHARSET utf8
BEGIN
DECLARE str varchar(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
DECLARE return_str varchar(255) default '';
DECLARE i INT DEFAULT 0;

WHILE i < n DO
SET return_str = concat(return_str, substring(str , FLOOR(1 + RAND() * 62), 1));
SET i = i + 1;
END WHILE;

return return_str;
END

#生成测试数据
CREATE PROCEDURE `generate_message`(n INT)
BEGIN
DECLARE type INT;
DECLARE i INT DEFAULT 0;
DECLARE date VARCHAR(20);

WHILE i < n DO
SET type = FLOOR(1 + RAND() * 4);
SET i = i + 1;
SET date = FROM_DAYS(TO_DAYS('2018-01-01') + FLOOR(RAND() * 364 + 1));
INSERT INTO message(`type`, `content`, `generate_time`) VALUES(type, generate_random(16), date);
END WHILE;
END

#通过调用存储过程, 生成500W的测试数据
call generate_message(5000000);

#查询测试数据分布情况
SELECT type, DATE_FORMAT(generate_time, '%Y-%m') m, COUNT(1) FROM message GROUP BY type, m;

#运行SQL, 耗时 1.339s
SELECT COUNT(1) FROM message WHERE type = 1 AND generate_time = '2018-07-07 00:00:00';



#消息表分区, 通过表结构可以看出, 以 type 和 generate_time 作为分区字段, 每种类型一年有12个分区, 共48个分区
CREATE TABLE `message_partition` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`type` tinyint(1) NOT NULL COMMENT '类型 1:评论,2:点赞,3:红包,4:交易',
`content` varchar(16) NOT NULL,
`generate_time` datetime NOT NULL,
PRIMARY KEY (`id`,`type`,`generate_time`),
KEY `generate_time` (`generate_time`)
) ENGINE=InnoDB AUTO_INCREMENT=5000001 DEFAULT CHARSET=utf8
PARTITION BY RANGE COLUMNS(`type`,generate_time)
(PARTITION p1d201801 VALUES LESS THAN (1,'2018-02-01') ENGINE = InnoDB,
PARTITION p1d201802 VALUES LESS THAN (1,'2018-03-01') ENGINE = InnoDB,
PARTITION p1d201803 VALUES LESS THAN (1,'2018-04-01') ENGINE = InnoDB,
PARTITION p1d201804 VALUES LESS THAN (1,'2018-05-01') ENGINE = InnoDB,
PARTITION p1d201805 VALUES LESS THAN (1,'2018-06-01') ENGINE = InnoDB,
PARTITION p1d201806 VALUES LESS THAN (1,'2018-07-01') ENGINE = InnoDB,
PARTITION p1d201807 VALUES LESS THAN (1,'2018-08-01') ENGINE = InnoDB,
PARTITION p1d201808 VALUES LESS THAN (1,'2018-09-01') ENGINE = InnoDB,
PARTITION p1d201809 VALUES LESS THAN (1,'2018-10-01') ENGINE = InnoDB,
PARTITION p1d201810 VALUES LESS THAN (1,'2018-11-01') ENGINE = InnoDB,
PARTITION p1d201811 VALUES LESS THAN (1,'2018-12-01') ENGINE = InnoDB,
PARTITION p1d201812 VALUES LESS THAN (1,'2019-01-01') ENGINE = InnoDB,
PARTITION p2d201801 VALUES LESS THAN (2,'2018-02-01') ENGINE = InnoDB,
PARTITION p2d201802 VALUES LESS THAN (2,'2018-03-01') ENGINE = InnoDB,
PARTITION p2d201803 VALUES LESS THAN (2,'2018-04-01') ENGINE = InnoDB,
PARTITION p2d201804 VALUES LESS THAN (2,'2018-05-01') ENGINE = InnoDB,
PARTITION p2d201805 VALUES LESS THAN (2,'2018-06-01') ENGINE = InnoDB,
PARTITION p2d201806 VALUES LESS THAN (2,'2018-07-01') ENGINE = InnoDB,
PARTITION p2d201807 VALUES LESS THAN (2,'2018-08-01') ENGINE = InnoDB,
PARTITION p2d201808 VALUES LESS THAN (2,'2018-09-01') ENGINE = InnoDB,
PARTITION p2d201809 VALUES LESS THAN (2,'2018-10-01') ENGINE = InnoDB,
PARTITION p2d201810 VALUES LESS THAN (2,'2018-11-01') ENGINE = InnoDB,
PARTITION p2d201811 VALUES LESS THAN (2,'2018-12-01') ENGINE = InnoDB,
PARTITION p2d201812 VALUES LESS THAN (2,'2019-01-01') ENGINE = InnoDB,
PARTITION p3d201801 VALUES LESS THAN (3,'2018-02-01') ENGINE = InnoDB,
PARTITION p3d201802 VALUES LESS THAN (3,'2018-03-01') ENGINE = InnoDB,
PARTITION p3d201803 VALUES LESS THAN (3,'2018-04-01') ENGINE = InnoDB,
PARTITION p3d201804 VALUES LESS THAN (3,'2018-05-01') ENGINE = InnoDB,
PARTITION p3d201805 VALUES LESS THAN (3,'2018-06-01') ENGINE = InnoDB,
PARTITION p3d201806 VALUES LESS THAN (3,'2018-07-01') ENGINE = InnoDB,
PARTITION p3d201807 VALUES LESS THAN (3,'2018-08-01') ENGINE = InnoDB,
PARTITION p3d201808 VALUES LESS THAN (3,'2018-09-01') ENGINE = InnoDB,
PARTITION p3d201809 VALUES LESS THAN (3,'2018-10-01') ENGINE = InnoDB,
PARTITION p3d201810 VALUES LESS THAN (3,'2018-11-01') ENGINE = InnoDB,
PARTITION p3d201811 VALUES LESS THAN (3,'2018-12-01') ENGINE = InnoDB,
PARTITION p3d201812 VALUES LESS THAN (3,'2019-01-01') ENGINE = InnoDB,
PARTITION p4d201801 VALUES LESS THAN (4,'2018-02-01') ENGINE = InnoDB,
PARTITION p4d201802 VALUES LESS THAN (4,'2018-03-01') ENGINE = InnoDB,
PARTITION p4d201803 VALUES LESS THAN (4,'2018-04-01') ENGINE = InnoDB,
PARTITION p4d201804 VALUES LESS THAN (4,'2018-05-01') ENGINE = InnoDB,
PARTITION p4d201805 VALUES LESS THAN (4,'2018-06-01') ENGINE = InnoDB,
PARTITION p4d201806 VALUES LESS THAN (4,'2018-07-01') ENGINE = InnoDB,
PARTITION p4d201807 VALUES LESS THAN (4,'2018-08-01') ENGINE = InnoDB,
PARTITION p4d201808 VALUES LESS THAN (4,'2018-09-01') ENGINE = InnoDB,
PARTITION p4d201809 VALUES LESS THAN (4,'2018-10-01') ENGINE = InnoDB,
PARTITION p4d201810 VALUES LESS THAN (4,'2018-11-01') ENGINE = InnoDB,
PARTITION p4d201811 VALUES LESS THAN (4,'2018-12-01') ENGINE = InnoDB,
PARTITION p4d201812 VALUES LESS THAN (4,'2019-01-01') ENGINE = InnoDB);

#将测试数据插入分区表
INSERT INTO message_partition(`type`, `content`, `generate_time`) SELECT type,content,generate_time FROM message;

#查询分区的数据分布情况
SELECT PARTITION_NAME, TABLE_ROWS FROM information_schema.PARTITIONS WHERE table_name = 'message_partition';

#运行SQL, 耗时 0.020s
SELECT COUNT(1) FROM message_partition WHERE type = 1 AND generate_time = '2018-07-07 00:00:00';
相关资料
0%