如何增加值来排:如何给行添加值?
- 我创建在数据帧中的一个列和分配值0。
- 写逻辑来更新这些现在列值,但不反射的。
输入:
>>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
>>> parafix_df
line_width para_num bbox
0 238.546 NaN (50.0, 579.3, 288.546, 598.022)
1 318 1 (64.0, 564.9, 382.0, 583.622)
2 332 2 (50.0, 550.5, 382.0, 569.222)
3 332 2 (50.0, 536.1, 382.0, 554.822)
4 328.977 2 (50.0, 521.7, 378.977, 540.422)
5 318 3 (64.0, 507.3, 382.0, 526.022)
6 332 3 (50.0, 492.9, 382.0, 511.622)
7 332 3 (50.0, 478.5, 382.0, 497.222)
8 332 3 (50.0, 464.1, 382.0, 482.822)
9 332 3 (50.0, 449.7, 382.0, 468.422)
10 59.04 3 (50.0, 435.3, 109.04, 454.022)
11 304.007 4 (64.0, 420.9, 368.007, 439.622)
12 318 5 (64.0, 406.5, 382.0, 425.222)
13 332 5 (50.0, 392.1, 382.0, 410.822)
14 332 5 (50.0, 377.7, 382.0, 396.422)
15 332 5 (50.0, 363.3, 382.0, 382.022)
16 43.252 5 (50.0, 348.9, 93.252, 367.622)
17 318 6 (64.0, 334.5, 382.0, 353.222)
18 332 6 (50.0, 320.1, 382.0, 338.822)
19 332 6 (50.0, 305.7, 382.0, 324.422)
20 332 6 (50.0, 291.3, 382.0, 310.022)
21 332 6 (50.0, 276.9, 382.0, 295.622)
22 317.02 6 (50.0, 262.5, 367.02, 281.222)
23 318 7 (64.0, 248.1, 382.0, 266.822)
24 332 7 (50.0, 233.7, 382.0, 252.422)
25 47.014 7 (50.0, 219.3, 97.014, 238.022)
26 318 8 (64.0, 204.9, 382.0, 223.622)
27 316.723 8 (50.0, 190.5, 366.723, 209.222)
28 318 9 (64.0, 176.1, 382.0, 194.822)
29 326.766 9 (50.0, 161.7, 376.766, 180.422)
30 318 10 (64.0, 147.3, 382.0, 166.022)
31 332 10 (50.0, 132.9, 382.0, 151.622)
32 332 10 (50.0, 118.5, 382.0, 137.222)
33 305.393 11 (64.0, 104.1, 369.393, 122.822)
34 318 12 (64.0, 89.7, 382.0, 108.422)
35 318 13 (64.0, 75.3, 382.0, 94.022)
36 319.165 13 (50.0, 60.9, 369.165, 79.622)
37 308.165 14 (64.0, 46.5, 372.165, 65.222)
38 318 15 (64.0, 32.1, 382.0, 50.822)
39 329.153 15 (50.0, 17.7, 379.153, 36.422)
40 318 16 (64.0, 3.3, 382.0, 22.022)
41 324.335 16 (50.0, -11.1, 374.335, 7.622)
代码:
bbox new_para_num
0 (50.0, 579.3, 288.546, 598.022) 0
1 (64.0, 564.9, 382.0, 583.622) 0
2 (50.0, 550.5, 382.0, 569.222) 0
3 (50.0, 536.1, 382.0, 554.822) 0
4 (50.0, 521.7, 378.977, 540.422) 0
5 (64.0, 507.3, 382.0, 526.022) 0
6 (50.0, 492.9, 382.0, 511.622) 0
7 (50.0, 478.5, 382.0, 497.222) 0
8 (50.0, 464.1, 382.0, 482.822) 0
9 (50.0, 449.7, 382.0, 468.422) 0
10 (50.0, 435.3, 109.04, 454.022) 0
11 (64.0, 420.9, 368.007, 439.622) 0
12 (64.0, 406.5, 382.0, 425.222) 0
13 (50.0, 392.1, 382.0, 410.822) 0
14 (50.0, 377.7, 382.0, 396.422) 0
15 (50.0, 363.3, 382.0, 382.022) 0
16 (50.0, 348.9, 93.252, 367.622) 0
17 (64.0, 334.5, 382.0, 353.222) 0
18 (50.0, 320.1, 382.0, 338.822) 0
19 (50.0, 305.7, 382.0, 324.422) 0
20 (50.0, 291.3, 382.0, 310.022) 0
21 (50.0, 276.9, 382.0, 295.622) 0
22 (50.0, 262.5, 367.02, 281.222) 0
23 (64.0, 248.1, 382.0, 266.822) 0
24 (50.0, 233.7, 382.0, 252.422) 0
25 (50.0, 219.3, 97.014, 238.022) 0
26 (64.0, 204.9, 382.0, 223.622) 0
27 (50.0, 190.5, 366.723, 209.222) 0
28 (64.0, 176.1, 382.0, 194.822) 0
29 (50.0, 161.7, 376.766, 180.422) 0
30 (64.0, 147.3, 382.0, 166.022) 0
31 (50.0, 132.9, 382.0, 151.622) 0
32 (50.0, 118.5, 382.0, 137.222) 0
33 (64.0, 104.1, 369.393, 122.822) 0
34 (64.0, 89.7, 382.0, 108.422) 0
35 (64.0, 75.3, 382.0, 94.022) 0
36 (50.0, 60.9, 369.165, 79.622) 0
37 (64.0, 46.5, 372.165, 65.222) 0
38 (64.0, 32.1, 382.0, 50.822) 0
39 (50.0, 17.7, 379.153, 36.422) 0
40 (64.0, 3.3, 382.0, 22.022) 0
41 (50.0, -11.1, 374.335, 7.622) 0
:上述代码
parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
parafix_df["new_para_num"] = 0
max_width = parafix_df['line_width'].max()
bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
previous = None
para1 = 1
for current, next in izip(parafix_df.iterrows(), parafix_df.iloc[1:].iterrows()):
if previous==None:
current[1]["new_para_num"] = para1
else:
bbox_current = current[1]["bbox"]
bbox_next = next[1]["bbox"]
bbox_previous = previous[1]["bbox"]
if bbox_current[0]>bbox_max_width[0]:
para1 += 1
print "para1:", para1
current[1]["new_para_num"] = para1
previous = current
输出
但我希望新的对数值:
para1: 2
para1: 3
para1: 4
para1: 5
para1: 6
para1: 7
para1: 8
para1: 9
para1: 10
para1: 11
para1: 12
para1: 13
para1: 14
para1: 15
para1: 16
你能帮助我吗?
以下是我最后的工作代码:
parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
parafix_df["new_para_num"] = 0
max_width = parafix_df['line_width'].max()
bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
para1 = 1
for indx, current in enumerate(parafix_df.iterrows(), start=0):
if indx!=0:
bbox_current = current[1]["bbox"]
if bbox_current[0]>bbox_max_width[0]:
para1 += 1
parafix_df.iloc[indx, 4] = para1
我们可以优化吗?
在你的代码中总是只访问第二行(索引:'1') - 'current [1]','next [1]'。但我想你不需要通过你的DF循环来实现你的目标 - 应该可以用“熊猫”的方式做到这一点 – MaxU
是的,我是熊猫新手。你可以再次检查我的代码,我更新了代码。 –