2014-02-15 46 views
0

我有1个数据与内容描述设置一个校 内容:SAS使用查找数据集像阵列中的另一个数据集

num  description 
content1 math 
content2 spanish 
content3 geography 
content4 chemistry 
content5 history 
在另一个数据集

(学生)我有阵列内容1-content5和我使用标志来指示每个学生的内容。

学生

name age content1 content2 content3 content4 content5 
BOB 15  1  1  1     1 
BRYA 16 
CARL 15    1       1 
SUE 17      1  1  1 
LOU 15           1 

如果我使用这样的代码:

data students1; 
    set students; 
    array content[5]; 
    format allcontents $100.; 
    do i=1 to dim(content); 
    if content[i]=1 then do; 
    allcontents=cat(vname(content[i]),',',allcontents); 
    end; 
    end; 
    run; 

结果是:

name age content1 content2 content3 content4 content5 allcontents 
BOB 15  1  1  1     1  content1,content2,content3,content5, 
BRYA 16 
CARL 15    1       1  content2,content5, 
SUE 17      1  1  1  content3,content4,content5, 
LOU 15           1  content5 

1)我想使用的查找表的名称(数据集内容)使用内容的名称,而不是变量allcontents中内容[1-5]的数组名称。我怎样才能做到这一点?

2)后来我想要的结果,通过内容的描述,而不是由学生,像这样:

description name age 
math   BOB 15 
spanish  BOB 15 
geography BOB 15 
history  BOB 15 
spanish  CARL 15 
history  CARL 15 
spanish  SUE 17 
chemistry SUE 17 
history  SUE 17 
history  LOU 15 

这可能吗?

谢谢。

回答

2

首先,从this post获取%create_hash()宏。

使用散列表查找值。

data students1; 
set students 
array content[5]; 
format num $32. description $16.; 
if _n_ = 1 then do; 
    %create_hash(cnt,num,description,"contents"); 
end; 
do i=1 to 5; 
    if content[i]=1 then do; 
     num = vname(content[i]); 
     rc = cnt.find(); 
     output; 
    end; 
end; 
keep description name age; 
run; 
+0

不错,它的作品使用哈希很棒,谢谢! – user3142441

1

我觉得proc transpose合适。对于问题2)做一次就足够了,对于重命名变量contents1-5(因此问题1)而言,做两次就足够了。关键是ID statementproc transpose它会根据它们对应的转置订单自动重命名变量。

下面的代码应该给你想要的答案(虽然名称是按字母顺序排列的,可能与你原来的排序不一样)。

/* original data sets */ 
data names; 
    input num $ description $; 
    cards; 
content1 math 
content2 spanish 
content3 geography 
content4 chemistry 
content5 history 
;run; 

data students; 
    input name $ age content1 content2 content3 content4 content5; 
    cards; 
BOB 15  1  1  1  .  1 
BRYA 16  .  .  .  .  . 
CARL 15  .  1  .  .  1 
SUE 17  .  .  1  1  1 
LOU 15  .  .  .  .  1 
;run; 

/* transpose */ 
proc sort data=students out=tmp_sorted; 
    by name age; 
run; 

proc transpose data=tmp_sorted out=tmp_transposed; 
    by name age; 
run; 

/* merge the names of content1-5 */ 
* If you want to preserve ordering from contents1-contents5 
* instead of alphabetical ordering of "description" column 
* from a-z, do not drop the "num" column for further use.; 
proc sql; 
    create table tmp_merged as 
    select B.description, A.name, A.age, B.num, A.COL1 
    from tmp_transposed as A 
    left join names as B 
     on A._NAME_=B.num 
    order by A.name, B.num; 
quit; 

/* transpose again */ 
proc transpose data=tmp_merged(drop=num) out=tmp_renamed(drop=_name_); 
    by name age; 
    ID description; *name the transposed variables; 
run; 

/* answer (1) */ 
data ans1; 
    set tmp_renamed; 
    array content[5] math--history; 
    format allcontents $100.; 
    do i=1 to dim(content); 
     * better use cats (cat does not seem to work); 
     if content[i]=1 then allcontents=cats(allcontents,',',vname(content[i])); 
    end; 
    *kill the leading comma; 
    allcontents=substr(allcontents,2,99); 
run; 

/* answer (2) */ 
data ans2(drop=num col1); 
    set tmp_merged; 
    where col1=1; 
run; 

*cleanup; 
proc datasets lib=work nolist; 
    delete tmp_:; 
quit;