# 数据库
CREATE DATABASE XXX;
DROP DATABASE XXX;
DROP SCHEMA XXX;
# 创建数据表
# 数据和字段都是用了反引号,以防和保留字相同
# int(11),其中 11 代表显示长度,与数值范围无关
# varchar(255),其中 255 代表最大长度
# player_name 的字符集是 uft8,排序规则是 utf8_general_ci,表示大小写不敏感
DROP TABLE IF EXISTS `player`;
CREATE TABLE `player` (
`player_id` int(11) NOT NULL AUTO_INCREMENT,
`team_id` int(11) NOT NULL,
`player_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`height` float(3, 2) NULL DEFAULT 0.00,
PRIMARY KEY (`player_id`) USING BTREE,
UNIQUE INDEX `player_name`(`player_name`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
# 增加列
ALTER TABLE {tb_name}
ADD [COLUMN] {col_name1} {col_definition1} [FIRST|AFTER existing_column1],
ADD [COLUMN] {col_name2} {col_definition2} [FIRST|AFTER existing_column2],...;
ALTER TABLE app ADD created_by varchar(32) NULL COMMENT '创建人';
# 删除列
ALTER TABLE {tb_name}
DROP COLUMN {col_name1},
DROP COLUMN {col_name2},...;
# 重命名列
ALTER TABLE XXX RENAME COLUMN age to player_age;
# 修改列数据类型
ALTER TABLE XXX MODIFY (player_age float(3,1));
数据表约束
主键约束:不能重复,不能为空,即 UNIQUE + NOT NULL;
外键约束:一个表的外键对应另一个表的主键;
唯一性约束:表明字段在表中的数值是唯一的;
NOT NULL 约束:不能为空;
DEFAULT:
CHECK:用来检查特定字段取值范围的有效性;
数据表的设计原则:
数据表越少越好;
数据表中的字段个数越少越好;
联合主键的字段数越少越好;
主键的利用率越高越好;
VIEW
# 创建视图
CREATE VIEW view_name AS select_statement;
CREATE VIEW player_above_avg_height AS
SELECT player_id, height
FROM player
WHERE height > (SELECT AVG(height) from player);
# 视图可以嵌套
CREATE VIEW player_above_above_avg_height AS
SELECT player_id, height
FROM player
WHERE height > (SELECT AVG(height) from player_above_avg_height);
# 修改视图
ALTER VIEW view_name AS select_statement;
ALTER VIEW player_above_avg_height AS
SELECT player_id, player_name, height
FROM player
WHERE height > (SELECT AVG(height) from player);
# 删除视图
DROP VIEW view_name;
CREATE PROCEDURE sp_name ([proc_parameter[,...]])
BEGIN
需要执行的语句
END
DROP {PROCEDURE | FUNCTION} [IF EXISTS] sp_name
比如计算 1 到 n 的和:
DELIMITER //
CREATE PROCEDURE `add_num`(IN n INT)
BEGIN
DECLARE i INT;
DECLARE sum INT;
SET i = 1;
SET sum = 0;
WHILE i <= n DO
SET sum = sum + i;
SET i = i + 1;
END WHILE;
SELECT sum;
END //
DELIMITER ;
CALL add_num(50);
CREATE PROCEDURE `get_hero_scores`(
OUT max_max_hp FLOAT,
OUT min_max_mp FLOAT,
OUT avg_max_attack FLOAT,
s VARCHAR(255))
BEGIN
SELECT MAX(hp_max), MIN(mp_max), AVG(attack_max)
FROM heros
WHERE role_main = s INTO max_max_hp, min_max_mp, avg_max_attack;
END
CALL get_hero_scores(@max_max_hp, @min_max_mp, @avg_max_attack, '战⼠');
SELECT @max_max_hp, @min_max_mp, @avg_max_attack;
# 查询某些列
SELECT name, create_time FROM tag;
# 查询所有列
SELECT * from tag;
# 取别名
SELECT name AS n, create_time AS ct FROM tag;
# 查询常数列,这一列的取值是我们指定的,不是从数据库中动态获取,适合整合不同的数据源
SELECT '标签' AS `type`, name FROM tag;
# 去除重复行
# DISTINCT 需要放到所有列名前
# DISTINCT 是对后面所有列的组合进行去重
SELECT DISTINCT name FROM tag;
# 排序
# ORDER BY 后面可以有多个列
# ORDER BY 后面可以著名排序规则,ASC(default)、DESC,
# ORDER BY 后面的列可以不在选择列里面
# ORDER BY 通常位于 SELECT 语句的最后
SELECT name FROM tag ORDER BY create_time;
# 返回结果数量
SELECT name FROM tag LIMIT 5;
所以当使用 ORDER BY 时,应尽量使用 IndexSort,可以使用 explain 查看是否使用索引排序。
所以有如下优化建议:
WHERE 和 ORDER BY 都使用索引,WHERE 使用时为了避免全表扫描,ORDER BY 使用时为了避免 FileSort。
若 WHERE 和 ORDER BY 是相同的列,就使用单索引列;若不同,就使用联合索引。
GROUP BY
我们可以对数据进行分组,并使用聚集函数统计每组数据的值。
SELECT COUNT(*) AS num, role_main, role_assist FROM heros GROUP BY role_main, role_assist ORDER BY num DESC;
若 GROUP BY 后面还有 ORDER BY,那么排序实际上是对分组后的数据排序,因为分组已经把多条数据聚合成了一条记录。
HAVING
当我们使用 GROUP BY 创建分组之后,可能需要对分组数据进行过滤,此时就需要使用 HAVING。WHERE 是作用于数据行,而 HAVING 是作用于分组。
SELECT COUNT(*) as num, role_main, role_assist
FROM heros
WHERE hp_max > 6000
GROUP BY role_main, role_assist
HAVING num > 5
ORDER BY num;
The SQL standard requires that HAVING must reference only columns in the GROUP BY clause or columns used in aggregate functions. However, MySQL supports an extension to this behavior, and permits HAVING to refer to columns in the SELECT list and columns in outer subqueries as well.
SQL 中关键字是有顺序的:
SELECT ... FROM ... WHERE ... GROUP BY ... HAVING ... ORDER BY ...
# 查询每个球队中大于平均身高的球员,查询球队的平均身高需要执行多次。
SELECT player_name, height, team_id
FROM player AS a
WHERE height > (SELECT avg(height) FROM player AS b WHERE a.team_id = b.team_id);
子查询还能作为主查询的计算字段:
# 查询每个球队的球员数量
SELECT team_name,
(SELECT count(*) FROM player WHERE player.team_id = team.team_id)
AS player_num
FROM team;
# 查询有出场纪录的球员
SELECT player_id, team_id, player_name
FROM player
WHERE EXISTS(SELECT player_id FROM player_score WHERE player.playe_id = player_score.player_id);
operand comparison_operator ANY (subquery)
operand IN (subquery)
operand comparison_operator SOME (subquery)
operand comparison_operator ALL (subquery)
comparison_operator:
= > < >= <= <> !=
IN: 是否在集合中。
ANY: 与子查询返回的任意一个值比较为真,则整体为真。
ALL: 与子查询返回的所有值比较。
SOME: 是 ANY 的别名,一般用 ANY。
如上文 EXISTS 查询有出场纪录的球员也可写成:
SELECT player_id, team_id, player_name
FROM player
WHERE player_id IN (SELECT player_id FROM player_score);
即 EXISTS 和 IN 的查询结果可等价:
SELECT * FROM A WHERE cc IN (SELECT cc FROM B);
# 先执行子句,在执行外层 SQL
for i in B
for j in A
if j.cc == i.cc then ...
SELECT * FROM A WHERE EXISTS (SELECT cc FROM B WHERE B.cc=A.cc)
# 对 A 表遍历,每条记录执行子句进行判断
for i in A
for j in B
if j.cc == i.cc then ...
查询结果等价,但是查询效率不等价。在上面的例子中,假设 cc 列建立的索引。若 A 表较大,那么 IN 的效率较高;反之,EXISTS 效率较高。
# 查询比(球队 1002 中任意一个球员身高)高的球员
SELECT player_id, player_name, height
FROM player
WHERE height > ANY (SELECT height FROM player WHERE team_id = 1002);
# 查询比(球队 1002 中所有球员身高)都高的球员
SELECT player_id, player_name, height
FROM player
WHERE height > ALL (SELECT height FROM player WHERE team_id = 1002);
JOIN
上文提到 SQL 常用的两个标准是 SQL92 和 SQL99,两个标准的连接查询语法也不一样。
CROSS JOIN
实际上就是求两个表的笛卡尔积,即行数 = A 行数 * B 行数。
# SQL99
SELECT * FROM player CROSS JOIN team;
SELECT * FROM t1 CROSS JOIN t2 CROSS JOIN t3;
# SQL92
SELECT * FROM player, team;
NATURAL JOIN
帮你自动查询两张表中所有相同的字段,然后进行等值连接。
# SQL99
SELECT player_id, team_id, player_name, height, team_name
FROM player
NATURAL JOIN team;
# SQL92
SELECT player_id, a.team_id, player_name, height, team_name
FROM player AS a,
team AS b
WHERE a.team_id = b.team_id;
若表使用了别名,那么查询字段中只能使用别名,不能使用原有的表名。
ON
用来指定连接条件。
# SQL99
SELECT player_id, player.team_id, player_name, height, team_name
FROM player
JOIN team ON player.team_id = team.team_id;
SELECT p.player_name, p.height, h.height_level
FROM player as p
JOIN height_grades as h ON height BETWEEN h.height_lowest AND h.height_highest;
# SQL92
SELECT p.player_name, p.height, h.height_level
FROM player AS p,
height_grades AS h
WHERE p.height BETWEEN h.height_lowest AND h.height_highest;
USING
用于指定数据表中的同名字段进行等值连接。可以简化 JOIN ON 的等值连接。
# SQL99
SELECT player_id, team_id, player_name, height, team_name
FROM player
JOIN team USING (team_id);
LEFT [OUTER] JOIN
左边是主表,需要显示左边表的全部行。
# SQL99
SELECT *
from player
LEFT JOIN team USING (team_id);
# SQL 92, MySQL 不支持
SELECT *
FROM player,
team
where player.team_id = team.team_id(+);
RIGHT [OUTER] JOIN
右边是主表,需要显示右边表的全部行。
# SQL99
SELECT *
FROM player
RIGHT JOIN team USING (team_id);
# SQL 92, MySQL 不支持
SELECT *
FROM player,
team
where player.team_id(+) = team.team_id;