2012-02-05 53 views
1

我遇到了针对我们的MS SQL Server数据库之一的查询问题。为了简洁起见,下面的表格和视图被简化了,但应该用来描述问题。SQL Server递归查询问题

每个表格的得分都被编译为其直系子女的平均值。这些视图对于固定结构已经足够了,但是对于当前处于嵌套集合形式的位置层次结构来说,它变得更加复杂。由于用户定义了位置层次结构中没有固定数量的层。

我试着用递归CTE解决这个问题,但是他们不允许在递归部分进行聚合。

CREATE TABLE [dbo].[locations_main](
    [id] [smallint] NOT NULL, 
    [name] [nchar](50) NOT NULL, 
    [lft] [smallint] NOT NULL, 
    [rgt] [smallint] NOT NULL, 
    [parent_id] [smallint] NULL, 
    CONSTRAINT [PK_locations_main] PRIMARY KEY CLUSTERED ([id] ASC) 
) 
GO 

INSERT INTO [dbo].[locations_main] VALUES 
    (1, 'location 1', 1, 16, NULL), 
    (2, 'location 1-1', 2, 9, 1), 
    (3, 'location 1-1-1', 3, 4, 2), 
    (4, 'location 1-1-2', 5, 6, 2), 
    (5, 'location 1-1-3', 7, 8, 2), 
    (7, 'location 1-2', 10, 15, 1), 
    (8, 'location 1-2-1', 11, 12, 7), 
    (9, 'location 1-2-2', 13, 14, 7) 
GO 

CREATE TABLE [dbo].[outcomes](
    [id] [smallint] NOT NULL, 
    [location_id] [smallint] NOT NULL, 
    [name] [nvarchar](50) NOT NULL, 
CONSTRAINT [PK_outcomes] PRIMARY KEY CLUSTERED ([id] ASC) 
) 
GO 

INSERT INTO [dbo].[outcomes] VALUES 
    (1, 3, 'outcome 1'), 
    (2, 4, 'outcome 2'), 
    (3, 5, 'outcome 3'), 
    (4, 8, 'outcome 4'), 
    (5, 9, 'outcome 5') 
GO 

CREATE TABLE [dbo].[prompts](
    [id] [smallint] NOT NULL, 
    [outcome_id] [smallint] NOT NULL, 
    [name] [nvarchar](50) NOT NULL, 
CONSTRAINT [PK_prompts] PRIMARY KEY CLUSTERED ([id] ASC) 
) 
GO 

INSERT INTO [dbo].[prompts] VALUES 
    (1, 1, 'prompt 1'), 
    (2, 2, 'prompt 2'), 
    (3, 3, 'prompt 3'), 
    (4, 4, 'prompt 4'), 
    (5, 5, 'prompt 5') 
GO 

CREATE TABLE [dbo].[subprompts](
    [id] [smallint] NOT NULL, 
    [prompt_id] [smallint] NOT NULL, 
    [name] [nvarchar](50) NOT NULL, 
    [score] [smallint] NOT NULL, 
CONSTRAINT [PK_subprompts] PRIMARY KEY CLUSTERED ([id] ASC) 
) 
GO 

INSERT INTO [dbo].[subprompts] VALUES 
    (1, 1, 'subprompt 1', 1), 
    (2, 1, 'subprompt 2', 1), 
    (3, 2, 'subprompt 3', 1), 
    (4, 2, 'subprompt 4', 3), 
    (5, 3, 'subprompt 5', 2), 
    (6, 3, 'subprompt 6', 4), 
    (7, 4, 'subprompt 7', 1), 
    (8, 4, 'subprompt 8', 5), 
    (9, 5, 'subprompt 9', 3), 
    (10, 5, 'subprompt 10', 3) 
GO 

CREATE VIEW [dbo].[vw_prompts] 
AS 
SELECT 
    dbo.prompts.id, 
    dbo.prompts.outcome_id, 
    dbo.prompts.name, 
    AVG(dbo.subprompts.score) AS score 
FROM dbo.prompts 
LEFT OUTER JOIN dbo.subprompts 
    ON dbo.prompts.id = dbo.subprompts.prompt_id 
GROUP BY 
    dbo.prompts.id, 
    dbo.prompts.outcome_id, 
    dbo.prompts.name 
GO 

CREATE VIEW [dbo].[vw_outcomes] 
AS 
SELECT 
    dbo.outcomes.id, 
    dbo.outcomes.location_id, 
    dbo.outcomes.name, 
    AVG(dbo.vw_prompts.score) AS score 
FROM dbo.outcomes 
LEFT OUTER JOIN dbo.vw_prompts 
    ON dbo.outcomes.id = dbo.vw_prompts.id 
GROUP BY 
    dbo.outcomes.id, 
    dbo.outcomes.location_id, 
    dbo.outcomes.name 
GO 

下面的查询检索所有的位置,但它的叶节点计算平均值不是位置的问题直接子 -

SELECT loc_main_ag.name, AVG(CAST(vw_outcomes.score AS FLOAT)) 
FROM locations_main loc_main_ag 
LEFT JOIN locations_main loc_main 
    ON loc_main_ag.lft <= loc_main.lft 
    AND loc_main_ag.rgt >= loc_main.rgt 
INNER JOIN vw_outcomes 
    ON loc_main.id = vw_outcomes.location_id 
GROUP BY loc_main_ag.name 

回报

location 1  2.4 
location 1-1  2 
location 1-1-1 1 
location 1-1-2 2 
location 1-1-3 3 
location 1-2  3 
location 1-2-1 3 
location 1-2-2 3 

“位置1“具有”位置1-1-1“,”位置1-1-2“,”位置1-1-3“,”位置1-2-1“和”位置1-2-2“的平均值 - (1 + 2 + 3 + 3 + 3)/ 5 = 2.4,而不是“位置1-1”和“位置的平均值Ñ1-2" - (2 + 3)/ 2 = 2.5

我试图通过使用CTE来解决这个打了一个问题,使用CTE的递归部分内GROUP BY和聚合函数 -

WITH location_scores 
AS 
(
-- Anchor member definition 
-- Get score for all leaf node locations 
SELECT locations_main.id, locations_main.name, locations_main.parent_id, AVG(CAST(vw_outcomes.score AS FLOAT)) AS score 
FROM locations_main 
INNER JOIN vw_outcomes 
    ON locations_main.id = vw_outcomes.location_id 
WHERE locations_main.rgt - locations_main.lft = 1 
GROUP BY locations_main.id, locations_main.name, locations_main.parent_id 

UNION ALL 

-- Recursive member definition 
-- Rollup through locations parents to build averages 
SELECT locations_main.id, locations_main.name, locations_main.parent_id, AVG(CAST(location_scores.score AS FLOAT)) AS score 
FROM locations_main 
INNER JOIN vw_outcomes 
    ON locations_main.id = vw_outcomes.location_id 
INNER JOIN location_scores 
    ON locations_main.id = location_scores.parent_id 
GROUP BY locations_main.id, locations_main.name, locations_main.parent_id 

) 
-- Statement that executes the CTE 
SELECT * 
FROM location_scores 

UPDATE:这是我对表值函数的尝试。它根据这里包含的简单示例返回正确的结果,但我担心这将如何扩展。它将在野外执行的层次可以在15^5个记录的区域中的某个地方。

CREATE FUNCTION scores() RETURNS 
    @result TABLE 
    (
     id    SMALLINT, 
     name   NVARCHAR(50), 
     lft    SMALLINT, 
     rgt    SMALLINT, 
     parent_id  SMALLINT, 
     score   FLOAT, 
     [level]   SMALLINT 
    ) AS 
BEGIN 
    DECLARE @level INT 
    SET @level = 1 

    INSERT INTO @result 
     SELECT 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id, 
      AVG(CAST(vw_outcomes.score AS FLOAT)) AS score, 
      @level AS [level] 
     FROM locations_main 
     INNER JOIN vw_outcomes 
      ON locations_main.id = vw_outcomes.location_id 
     WHERE locations_main.rgt - locations_main.lft = 1 
     GROUP BY 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id 

    WHILE (SELECT COUNT(*) FROM @result WHERE level = @level AND parent_id IS NOT NULL) > 0 BEGIN 

     INSERT INTO @result 
     SELECT 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id, 
      AVG(CAST(res.score AS FLOAT)) AS score, 
      (@level + 1) AS [level] 
     FROM locations_main 
     INNER JOIN @result res 
      ON locations_main.id = res.parent_id 
      AND res.level = @level 
     GROUP BY 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id 

     SET @level = @level + 1 

    END 

RETURN 
END 

我真的很感谢一些意见,这是否是一种合适的方法。

+0

您的问题并不十分清楚,因为您的数据看起来并不明确,您期望的结果如何。你能发布一个简单的测试用例,显示你想要达到的目标吗?测试用例不必使用真实的表格或数据,只需说明您的要求即可。 – Pondlife 2012-02-06 08:43:41

回答

0

这是我提出的表值函数。由于问题的性质,我不认为这对任何人都有用,但为了完整起见,我将它包括在内。在整个层次结构中共有920k条记录,这在我们的开发服务器上会在3秒内返回。

CREATE FUNCTION cqc_location_template_scores (@location_id INT = NULL) RETURNS 
    @result TABLE 
    (
     id    SMALLINT, 
     name   NVARCHAR(50), 
     lft    SMALLINT, 
     rgt    SMALLINT, 
     parent_id  SMALLINT, 
     score   FLOAT, 
     [level]   SMALLINT 
    ) AS 
BEGIN 
    DECLARE @level INT 
    SET @level = 1 

    DECLARE @lft INT, @rgt INT 
    IF (@location_id IS NOT NULL) 
     SELECT @lft = lft, @rgt = rgt FROM locations_main WHERE id = @location_id 
    ELSE 
     SELECT @lft = NULL, @rgt = NULL 

    DECLARE @ROLLUP_TYPE VARCHAR(50) 
    SELECT @ROLLUP_TYPE = parmvalue FROM globals WHERE parameter = 'CQC_ROLLUP_TYPE' 

    -- TEST TO GUARD AGAINST INFINITE LOOP CAUSED BY LOCATIONS_MAIN RECORD BEING ITS OWN PARENT 
    IF ((SELECT COUNT(*) FROM locations_main WHERE id = parent_id) > 0) 
     RETURN 


    INSERT INTO @result 
     SELECT 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id, 

      CASE @ROLLUP_TYPE 
       WHEN 'AVE' THEN CAST(ROUND(AVG(CAST(CODE_CQC_STATUS.cod_score AS FLOAT)), 0) AS INT) 
       WHEN 'WORST' THEN MIN(CODE_CQC_STATUS.cod_score) 
       ELSE NULL 
      END AS score, 

      @level AS [level] 
     FROM locations_main 
     INNER JOIN cqc_outcomes 
      ON locations_main.id = cqc_outcomes.cdo_location 
     INNER JOIN CODE_CQC_STATUS 
      ON cqc_outcomes.cdo_status = CODE_CQC_STATUS.CODE 
     WHERE locations_main.rgt - locations_main.lft = 1 
     AND (locations_main.lft >= @lft OR @lft IS NULL) 
     AND (locations_main.rgt <= @rgt OR @rgt IS NULL) 
     GROUP BY 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id 

    WHILE (SELECT COUNT(*) FROM @result WHERE level = @level AND parent_id IS NOT NULL) > 0 BEGIN 

     INSERT INTO @result 
     SELECT 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id, 

      CASE @ROLLUP_TYPE 
       WHEN 'AVE' THEN CAST(ROUND(AVG(CAST(res.score AS FLOAT)), 0) AS INT) 
       WHEN 'WORST' THEN MIN(res.score) 
       ELSE NULL 
      END AS score, 

      (@level + 1) AS [level] 
     FROM locations_main 
     INNER JOIN @result res 
      ON locations_main.id = res.parent_id 
      AND res.level = @level 
     WHERE (locations_main.lft >= @lft OR @lft IS NULL) 
     AND (locations_main.rgt <= @rgt OR @rgt IS NULL) 
     GROUP BY 
      locations_main.id, 
      locations_main.name, 
      locations_main.lft, 
      locations_main.rgt, 
      locations_main.parent_id 

     SET @level = @level + 1 

     -- TEST TO GUARD AGAINST INFINITE LOOP 
     IF (@level > 10) 
     BEGIN 
      DELETE FROM @result 
      RETURN 
     END 

    END 

RETURN 
END 
GO 
0

抱歉,我不能格式化SQL代码时,我将它复制到我的SSMS

但我可以说,有关SQL Server中的递归查询的是,他们的两个部分构成。一个锚和一个递归部分。

您可以查看文章http://www.kodyaz.com/t-sql/sql-server-recursive-query-with-recursive-cte.aspx递归样品

如果集团通过造成的问题,你能想到的分组递归部分之外的查询结果的方式。

+0

感谢您花时间回复。我不认为CTE选项是可行的,因为在递归部分无法使用GROUP BY。没有GROUP BY,在此查询中使用递归没有任何用处,因为可以使用嵌套集相关查询直接返回层次结构。我希望有人能够提出一种不同的方法来满足我的需求。 – nnichols 2012-02-07 10:30:07