2011-04-08 85 views
3

我有其中有像SQL查询计数频率

id  keywords 
1  cat, dog, man, mouse 
2  man, pen, pencil, eraser 
3  dog, man, friends 
4  dog, leash,...... 

项目的表我想打一个表像

id cat dog man mouse pen pencil eraser friends leash ...... 
1 1 1 1 1  0 0  0  0  0 
2 0 0 1 0  1 1  1  0  0 
3 0 1 1 0  0 0  0  1  0 

等。

回答

5
WITH basedata(id,keywords) AS 
(
SELECT 1,'cat, dog, man, mouse' union all 
SELECT 2 ,'man, pen, pencil, eraser' union all 
SELECT 3,'dog, man, friends' union all 
SELECT 4,'dog, leash' 
), 
cte(id, t, x) 
    AS (SELECT *, 
       CAST('<foo>' + REPLACE(keywords,',','</foo><foo>') + '</foo>' AS XML) 
     FROM basedata) 
SELECT id, 
     LTRIM(RTRIM(w.value('.', 'nvarchar(max)'))) as keyword 
INTO #Split  
FROM cte 
     CROSS APPLY x.nodes('//foo') as word(w) 


DECLARE @ColList nvarchar(max) 

SELECT @ColList = ISNULL(@ColList + ',','') + keyword 
FROM (
SELECT DISTINCT QUOTENAME(keyword) AS keyword 
FROM #Split 
) T 

EXEC(N'  
SELECT * 
FROM #Split 
PIVOT (COUNT(keyword) FOR keyword IN (' + @ColList + N')) P') 

DROP TABLE #Split 

给人

id   cat   dog   eraser  friends  leash  man   mouse  pen   pencil 
----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- ----------- 
1   1   1   0   0   0   1   1   0   0 
2   0   0   1   0   0   1   0   1   1 
3   0   1   0   1   0   1   0   0   0 
4   0   1   0   0   1   0   0   0   0 
1
SELECT id, 
CAST(CASE WHEN Charindex('dog', keywords) > 0 THEN 1 ELSE 0 END AS bit) as DOG, 
CAST(CASE WHEN Charindex('cat', keywords) > 0 THEN 1 ELSE 0 END AS bit) as CAT, 
... 
FROM yourtable 
+1

边缘情况:该解决方案将虚报的比赛,如果一个字符串可以是另一个子串(如:'cat'是'locate'的子串)。 – 2011-04-08 11:52:37

2

您必须使用枢轴形式?你的最终结果是每个ID的频率 - 这看起来很奇怪?否则单元总是包含1作为频率。

看看这是否适合你。

的样本数据

create table basedata(id int,keywords varchar(max)); 
insert basedata 
SELECT 1,'cat, dog, man, mouse' union all 
SELECT 2 ,'man, pen, pencil, eraser' union all 
SELECT 3,'dog, man, friends' union all 
SELECT 4,'dog, leash' 

查询

;with cte(id, list, word) as (
select id, 
    cast(STUFF(keywords,1,CHARINDEX(',',keywords+','),'') as varchar(max)), 
    cast(ltrim(rtrim(LEFT(keywords,CHARINDEX(',',keywords+',')-1))) as varchar(max)) 
from basedata 
where keywords > '' 
union all 
select id, 
    STUFF(list,1,CHARINDEX(',',list+','),''), 
    ltrim(rtrim(LEFT(list,CHARINDEX(',',list+',')-1))) 
from cte 
where list > '' 
) 
select word, COUNT(*) frequency 
from cte 
group by word 

输出

word  frequency 
---------- ----------- 
cat  1 
dog  3 
eraser  1 
friends 1 
leash  1 
man  3 
mouse  1 
pen  1 
pencil  1 
0

如果您使用的是SQL Server 2008中,您可以使用全文解析器来分割你的字符串:

Declare @Inputs Table (Id int not null Primary Key, Keywords nvarchar(max)) 
Insert @Inputs(Id, Keywords) Values(1, 'cat, dog, man, mouse') 
Insert @Inputs(Id, Keywords) Values(2, 'man, pen, pencil, eraser') 
Insert @Inputs(Id, Keywords) Values(3, 'dog, man, friends') 
Insert @Inputs(Id, Keywords) Values(4, 'dog, leash') 

Declare @LCID int 
Declare @StopListId int 
Declare @AccentSensitive int 

Set @LCID = Cast(DatabasePropertyEx('master','LCID') As int) 
Set @StopListId = 0 
Set @AccentSensitive = 1 

Select S.display_term, Count(*) As Frequency 
From @Inputs As I 
    Cross Apply (
       Select display_term 
       From sys.dm_fts_parser(QUOTENAME(I.Keywords, '"') 
        , @LCID, @StopListId, @AccentSensitive) 
       ) As S 
Group By S.display_term 

但是,如果您不使用SQL Server 2008,那么您需要一个拆分函数。我在这篇文章的末尾介绍过。然后将查询很简单:

Select LTrim(RTrim(S.Value)), Count(*) As Frequency 
From @Inputs As I 
    Cross Apply dbo.Split(I.Keywords, ',') As S 
Group By LTrim(RTrim(S.Value)) 

和分割功能:

Create Function [dbo].[Split] 
( 
    @DelimitedList nvarchar(max) 
    , @Delimiter varchar(2) = ',' 
) 
RETURNS TABLE 
AS 
RETURN 
    (
    With CorrectedList As 
     (
     Select Case When Left(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End 
      + @DelimitedList 
      + Case When Right(@DelimitedList, DataLength(@Delimiter)) <> @Delimiter Then @Delimiter Else '' End 
      As List 
      , DataLength(@Delimiter) As DelimiterLen 
     ) 
     , Numbers As 
     (
     Select TOP (Coalesce(Len(@DelimitedList),1)) Row_Number() Over (Order By c1.object_id) As Value 
     From sys.objects As c1 
      Cross Join sys.columns As c2 
     ) 
    Select CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen As Position 
     , Substring (
        CL.List 
        , CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen  
        , CharIndex(@Delimiter, CL.list, N.Value + 1)       
         - (CharIndex(@Delimiter, CL.list, N.Value) + CL.DelimiterLen) 
        ) As Value 
    From CorrectedList As CL 
     Cross Join Numbers As N 
    Where N.Value < Len(CL.List) 
     And Substring(CL.List, N.Value, CL.DelimiterLen) = @Delimiter 
    )