2016-10-10 47 views
0

我有数据格式为:选择数据,分组为直方图

CREATE TABLE data(y int) 

INSERT INTO data VALUES ((1)) 
INSERT INTO data VALUES ((55555)) 
INSERT INTO data VALUES ((55555)) 
INSERT INTO data VALUES ((99999)) 

我想创建一个直方图,为得到一个粗略的概述我的数据是如何分布。我想这种格式作为输出:

lowerBoundary upperBoundary y 
------------- ------------- ----------- 
0    9999   1 
10000   19999   0 
20000   29999   0 
30000   39999   0 
40000   49999   0 
50000   59999   2 
60000   69999   0 
70000   79999   0 
80000   89999   0 
90000   99999   1 

回答

0

你必须创建数表,从而使0 -rows会正确显示。然后你可以计算每个“组”的下边界和上边界。

例如,SQL:

SELECT lowerBoundary, upperBoundary, COUNT(d.y) AS y 
FROM (

    SELECT n*10000 AS lowerBoundary, (n+1)*10000-1 AS upperBoundary 
    FROM (

     -- Selects possible groups. Make this big enough for your data. 
     SELECT ones.n + 10*tens.n + 100*hundreds.n AS n 
     FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n), 
      (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n), 
      (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n) 
    ) numbersTable 
) boundaries 

-- join with data 
LEFT JOIN data d 
ON d.y BETWEEN lowerBoundary AND upperBoundary 

-- avoid trailing '0' rows 
WHERE lowerBoundary <= (SELECT MAX(d.y) FROM data d) 
GROUP BY lowerBoundary, upperBoundary 
ORDER BY 1 

Click here to run this skript at SQL-Fiddle

0

另一种选择......

我用TVF生成动态范围。作为单一语句功能,它非常快速。此外,如果您不能使用UDF,则可以轻松将逻辑移植到cte或子查询中。

Select RetVal1 
     ,RetVaL2 
     ,y = sum(case when y is null then 0 else 1 end) 
From [dbo].[udf-Range-Number-Span](0,100000,10000) A 
Left Join Data B on y>=RetVal1 and y<RetVal2 
Group By RetVal1,RetVal2 

返回

RetVal1  RetVaL2  y 
0.00  10000.00 1 
10000.00 20000.00 0 
20000.00 30000.00 0 
30000.00 40000.00 0 
40000.00 50000.00 0 
50000.00 60000.00 2 
60000.00 70000.00 0 
70000.00 80000.00 0 
80000.00 90000.00 0 
90000.00 100000.00 1 

如果需要

CREATE FUNCTION [dbo].[udf-Range-Number-Span] (@R1 money,@R2 money,@Incr money) 
Returns Table 
Return (
    with cte0(M) As (Select cast((@[email protected])/@Incr as int)), 
     cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)), 
     cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h) 

    Select RetSeq=1,[email protected],[email protected][email protected] 
    Union All 
    Select N+1,(N*@Incr)[email protected],((N*@Incr)[email protected])[email protected] 
    From cte2,cte0 
    Where N<cte0.M 
) 
--Max 100 million observations 
--Select * from [dbo].[udf-Range-Number-Span](1,4,.5) 
的UDF