1
(SELECT
id,
SUM(hits/ab) AS HAB
FROM batting
GROUP BY id
) b
SELECT id, bmonth, bstate FROM master a
WHERE bmonth >= 0 AND bstate is NOT NULL
GROUP By bmonth,bstate
到目前为止,我有这个乱码,但我迷失在如何形成连接,然后继续。我不知道从哪里开始尽可能地做事。我们应该加入还是使用子查询?请协助下面的架构Hive加入或子查询混淆
看:
CREATE EXTERNAL TABLE IF NOT EXISTS batting
(id STRING, year INT, team STRING,
league STRING, games INT, ab INT, runs INT, hits INT, doubles INT, triples INT,
homeruns INT, rbi INT, sb INT, cs INT, walks INT, strikeouts INT, ibb INT,
hbp INT, sh INT, sf INT, gidp INT)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LOCATION '/home/hduser/hivetest/batting';
CREATE EXTERNAL TABLE IF NOT EXISTS master
(id STRING, byear INT, bmonth INT, bday INT, bcountry STRING, bstate STRING,
bcity STRING, dyear INT, dmonth INT, dday INT, dcountry STRING, dstate STRING,
dcity STRING, fname STRING, lname STRING, name STRING, weight INT, height INT,
bats STRING, throws STRING, debut STRING, finalgame STRING, retro STRING,
bbref STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/home/hduser/hivetest/master';
喜感谢u为答案,但得到一个无法识别输入“(”“(”从源“选择”失败:ParseException的 – dedpo
@dedpo我已经编辑了答案,尽量不要now.Sorry无法访问群集 –
这不是预期的输出,但它对于如何执行多连接非常有用 – dedpo