2012-12-08 30 views
6

我想为树木育种项目创建一个线性祖先列表。父母是不能与男/女对(无近亲繁殖),因此,重视跟踪和可视化这些家系...PostgreSQL通过2个父/子表递归

下面是使用PostgreSQL 9.1测试表/数据:

DROP TABLE if exists family CASCADE; 
DROP TABLE if exists plant CASCADE; 

CREATE TABLE family ( 
    id serial, 
    family_key VARCHAR(20) UNIQUE, 
    female_plant_id INTEGER NOT NULL DEFAULT 1, 
    male_plant_id INTEGER NOT NULL DEFAULT 1, 
    filial_n INTEGER NOT NULL DEFAULT -1, -- eg 0,1,2... Which would represent None, F1, F2... 
    CONSTRAINT family_pk PRIMARY KEY (id) 
); 

CREATE TABLE plant ( 
    id serial, 
    plant_key VARCHAR(20) UNIQUE, 
    id_family INTEGER NOT NULL, 
    CONSTRAINT plant_pk PRIMARY KEY (id), 
    CONSTRAINT plant_id_family_fk FOREIGN KEY(id_family) REFERENCES family(id) -- temp may need to remove constraint... 
); 

-- FAMILY Table DATA: 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (1,'NA',1,1,1); -- Default place holder record 
-- Root level Alba families 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (2,'family1AA',2,3,1); 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (3,'family2AA',4,5,1); 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (4,'family3AA',6,7,1); 
-- F2 Hybrid Families 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (5,'family4AE',8,11,0); 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (6,'family5AG',9,12,0); 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (7,'family6AT',10,13,0); 
-- F3 Double Hybrid family: 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (9,'family7AEAG',14,15,0); 
-- F3 Tri-hybrid backcross family: 
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (10,'family8AEAGAT',17,16,0); 

-- PLANT Table DATA: 
-- Root level Alba Parents: 
insert into plant (id, plant_key, id_family) VALUES (1,'NA',1);  -- Default place holder record 
insert into plant (id, plant_key, id_family) VALUES (2,'female1A',1); 
insert into plant (id, plant_key, id_family) VALUES (3,'male1A',1); 
insert into plant (id, plant_key, id_family) VALUES (4,'female2A',1); 
insert into plant (id, plant_key, id_family) VALUES (5,'male2A',1); 
insert into plant (id, plant_key, id_family) VALUES (6,'female3A',1); 
insert into plant (id, plant_key, id_family) VALUES (7,'male3A',1); 
-- Female Alba progeny: 
insert into plant (id, plant_key, id_family) VALUES (8,'female4A',2); 
insert into plant (id, plant_key, id_family) VALUES (9,'female5A',3); 
insert into plant (id, plant_key, id_family) VALUES (10,'female6A',4); 
-- Male Aspen Root level parents: 
insert into plant (id, plant_key, id_family) VALUES (11,'male1E',1); 
insert into plant (id, plant_key, id_family) VALUES (12,'male1G',1); 
insert into plant (id, plant_key, id_family) VALUES (13,'female1T',1); 
-- F1 Hybrid progeny: 
insert into plant (id, plant_key, id_family) VALUES (14,'female1AE',5); 
insert into plant (id, plant_key, id_family) VALUES (15,'male1AG',6); 
insert into plant (id, plant_key, id_family) VALUES (16,'male1AT',7); 
-- Hybrid progeny 
insert into plant (id, plant_key, id_family) VALUES (17,'female1AEAG',9); 
-- Tri-hybrid backcross progeny: 
insert into plant (id, plant_key, id_family) VALUES (18,'female1AEAGAT',10); 
insert into plant (id, plant_key, id_family) VALUES (19,'female2AEAGAT',10); 

下面是我从Postgres WITH Queries文档导出的递归查询:

WITH RECURSIVE search_tree(
     family_key 
    , female_plant 
    , male_plant 
    , depth 
    , path 
    , cycle 
) AS (
    SELECT 
      f.family_key 
     , pf.plant_key 
     , pm.plant_key 
     , 1 
     , ARRAY[ROW(pf.plant_key, pm.plant_key)] 
     , false 
    FROM 
      family f 
     , plant pf 
     , plant pm 
    WHERE 
     f.female_plant_id = pf.id 
     AND f.male_plant_id = pm.id 
     AND f.filial_n = 1 -- Include only F1 families (root level) 
     AND f.id <> 1  -- omit the default first family record 

    UNION ALL 

    SELECT 
      f.family_key 
     , pf.plant_key 
     , pm.plant_key 
     , st.depth + 1 
     , path || ROW(pf.plant_key, pm.plant_key) 
     , ROW(pf.plant_key, pm.plant_key) = ANY(path) 
    FROM 
      family f 
     , plant pf 
     , plant pm 
     , search_tree st 
    WHERE 
     f.female_plant_id = pf.id 
     AND f.male_plant_id = pm.id 
     AND f.family_key = st.family_key 
     AND pf.plant_key = st.female_plant 
     AND pm.plant_key = st.male_plant 
     AND f.filial_n <> 1 -- Include only non-F1 families (non-root levels) 
     AND NOT cycle 
) 
SELECT * FROM search_tree; 

下面是所期望的输出:

F1 family1AA=(female1A x male1A) > F2 family4AE=(female4A x male1E) > F3 family7AEAG=(female1AE x male1AG) > F4 family8AEAGAT=(female1AEAG x male1AT) 
F1 family2AA=(female2A x male2A) > F2 family5AG=(female5A x male1G) > F3 family7AEAG=(female1AE x male1AG) > F4 family8AEAGAT=(female1AEAG x male1AT) 
F1 family3AA=(female3A x male3A) > F2 family6AT=(female6A x female1T) > F3 family8AEAGAT=(female1AEAG x male1AT) 

上面的递归查询显示3行与适当的F1父母,但路径不显示下游家庭/父母。我将不胜感激帮助使递归输出类似于上面列出的所需输出。

+0

尼斯问题;非常好。非常完整。我正在努力... – wildplasser

+0

我不确定我了解层次结构是如何定义的。我在示例表中找不到父/子关系。你能解释一下父母(或孩子)的发现吗? –

+0

'plant.id = 11'这行可能有'2'作为'family_id'吗? –

回答

4

我已经适应了查询什么,我也明白了,不一定需要什么:-)

查询开始于被f.id != 1 AND f.filial_n = 1定义的三个给家庭和递归扩展了可供孩子。

在什么情况下,只有最后三场比赛应该被选中是我的理解。也许对于每个初始家族来说,最长的连锁店?

WITH RECURSIVE expanded_family AS (
    SELECT 
     f.id, 
     f.family_key, 
     pf.id   pd_id, 
     pf.plant_key pf_key, 
     pf.id_family pf_family, 
     pm.id   pm_id, 
     pm.plant_key pm_key, 
     pm.id_family pm_family, 
     f.filial_n 
    FROM family f 
     JOIN plant pf ON f.female_plant_id = pf.id 
     JOIN plant pm ON f.male_plant_id = pm.id 
), 
search_tree AS (
    SELECT 
     f.*, 
     1 depth, 
     ARRAY[f.family_key::text] path 
    FROM expanded_family f 
    WHERE 
     f.id != 1 
     AND f.filial_n = 1 
    UNION ALL 
    SELECT 
     f.*, 
     depth + 1, 
     path || f.family_key::text 
    FROM search_tree st 
     JOIN expanded_family f 
      ON f.pf_family = st.id 
      OR f.pm_family = st.id 
    WHERE 
     f.id <> 1 
) 
SELECT 
    family_key, 
    depth, 
    path 
FROM search_tree; 

结果是:

family_key | depth |      path      
---------------+-------+------------------------------------------------- 
family1AA  |  1 | {family1AA} 
family2AA  |  1 | {family2AA} 
family3AA  |  1 | {family3AA} 
family4AE  |  2 | {family1AA,family4AE} 
family5AG  |  2 | {family2AA,family5AG} 
family6AT  |  2 | {family3AA,family6AT} 
family7AEAG |  3 | {family1AA,family4AE,family7AEAG} 
family7AEAG |  3 | {family2AA,family5AG,family7AEAG} 
family8AEAGAT |  3 | {family3AA,family6AT,family8AEAGAT} 
family8AEAGAT |  4 | {family1AA,family4AE,family7AEAG,family8AEAGAT} 
family8AEAGAT |  4 | {family2AA,family5AG,family7AEAG,family8AEAGAT} 

技术性的东西:

  • 我已删除了cycle的东西,因为对于干净的数据不应该是必要的(恕我直言)

  • expanded_family可以内联,如果发生一些奇怪的性能问题,但现在它使递归查询更具可读性。

EDIT

查询可以过滤这些行,其中,对于每一个“根”家庭(即,对于该查询开始的那些),最长路径中存在的轻微修改。

我只显示更改的部分search_tree,所以你必须将头从上一节复制:

-- ... 
search_tree AS 
(
    SELECT 
     f.*, 
     f.id   family_root, -- remember where the row came from. 
     1 depth, 
     ARRAY[f.family_key::text] path 
    FROM expanded_family f 
    WHERE 
     f.id != 1 
     AND f.filial_n = 1 
    UNION ALL 
    SELECT 
     f.*, 
     st.family_root, -- propagate the anchestor 
     depth + 1, 
     path || f.family_key::text 
    FROM search_tree st 
     JOIN expanded_family f 
      ON f.pf_family = st.id 
      OR f.pm_family = st.id 
    WHERE 
     f.id <> 1 
) 
SELECT 
    family_key, 
    path 
FROM 
(
    SELECT 
     rank() over (partition by family_root order by depth desc), 
     family_root, 
     family_key, 
     depth, 
     path 
    FROM search_tree 
) AS ranked 
WHERE rank = 1; 

结果是:

family_key |      path      
---------------+------------------------------------------------- 
family8AEAGAT | {family1AA,family4AE,family7AEAG,family8AEAGAT} 
family8AEAGAT | {family2AA,family5AG,family7AEAG,family8AEAGAT} 
family8AEAGAT | {family3AA,family6AT,family8AEAGAT} 
(3 rows) 

EDIT2

根据评论我添加了pretty_print版本的路径:

WITH RECURSIVE expanded_family AS (
    SELECT 
     f.id, 
     pf.id_family pf_family, 
     pm.id_family pm_family, 
     f.filial_n, 
     f.family_key || '=(' || pf.plant_key || ' x ' || pm.plant_key || ')' pretty_print 
    FROM family f 
     JOIN plant pf ON f.female_plant_id = pf.id 
     JOIN plant pm ON f.male_plant_id = pm.id 
), 
search_tree AS 
(
    SELECT 
     f.id, 
     f.id   family_root, 
     1 depth, 
     'F1 ' || f.pretty_print path 
    FROM expanded_family f 
    WHERE 
     f.id != 1 
     AND f.filial_n = 1 
    UNION ALL 
    SELECT 
     f.id, 
     st.family_root, 
     st.depth + 1, 
     st.path || ' -> F' || st.depth+1 || ' ' || f.pretty_print 
    FROM search_tree st 
     JOIN expanded_family f 
      ON f.pf_family = st.id 
      OR f.pm_family = st.id 
    WHERE 
     f.id <> 1 
) 
SELECT 
    path 
FROM 
(
    SELECT 
     rank() over (partition by family_root order by depth desc), 
     path 
    FROM search_tree 
) AS ranked 
WHERE rank = 1; 

结果是

path                   
---------------------------------------------------------------------------------------------------------------------------------------------------------- 
F1 family1AA=(female1A x male1A) -> F2 family4AE=(female4A x male1E) -> F3 family7AEAG=(female1AE x male1AG) -> F4 family8AEAGAT=(female1AEAG x male1AT) 
F1 family2AA=(female2A x male2A) -> F2 family5AG=(female5A x male1G) -> F3 family7AEAG=(female1AE x male1AG) -> F4 family8AEAGAT=(female1AEAG x male1AT) 
F1 family3AA=(female3A x male3A) -> F2 family6AT=(female6A x female1T) -> F3 family8AEAGAT=(female1AEAG x male1AT) 
(3 rows) 
+0

太棒了 - 我应该可以从这里拿走它!我可能使用PL/pgsql删除重复的祖先并添加父/子格式。谢谢你的帮助!你已经帮助孕育了更好的树! – user1888167

+0

@ user1888167:不需要pl/pgsql。您可以在三个地方添加适当的过滤器:非递归部分的WHERE(已经检查过f.id和f.filial_id),递归WHERE,还可以添加一个过滤器“外部”选择。 “外部”SELECT是这类东西的常用位置。要进行过滤,您可以使用比当前输出显示更多的信息。
我只是不知道你想要应用什么标准。 –

+0

最可取的标准是只显示“完整的家庭成员”,这将是您输出的最后三行。所以,是的,对于每个初始的家庭来说,这将是最长的独裁者连锁店?这可能吗? – user1888167