2015-01-15 29 views
10

我有一个表格的图像(见下文)。而我想从表中的数据,类似这种形式(表图像的第一行):处理表格的图像以从中获取数据

rows[0] = [x,x, , , , ,x, ,x,x, ,x, ,x, , , , ,x, , , ,x,x,x, ,x, ,x, , , , ] 

我需要X的数量以及空格数。 还会有其他表格图像与此类似(所有图像具有x和相同数量的列)。

enter image description here

到目前为止,我能检测出所有使用X的图像X的。我可以稍微检测一下线条。我使用Python的开放cv2。我也使用houghTransform来检测水平和垂直线(效果非常好)。

我想弄清楚如何我可以一行一行地将信息存储在列表中。

这些训练图像: 用于检测X(在代码train1.png) enter image description here

用于检测线(在代码train2.png) enter image description here

用于检测线(在代码train3.png) enter image description here

这是代码我迄今为止:

# process images 
from pytesser import * 
from PIL import Image 
from matplotlib import pyplot as plt 
import pytesseract 
import numpy as np 
import cv2 
import math 
import os 

# the table images 
images = ['table1.png', 'table2.png', 'table3.png', 'table4.png', 'table5.png'] 

# the template images used for training 
templates = ['train1.png', 'train2.png', 'train3.png'] 

def hough_transform(im): 
    img = cv2.imread('imgs/'+im) 
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
    edges = cv2.Canny(gray, 50, 150, apertureSize=3) 

    lines = cv2.HoughLines(edges, 1, np.pi/180, 200) 

    i = 1 
    for rho, theta in lines[0]: 
     a = np.cos(theta) 
     b = np.sin(theta) 
     x0 = a*rho 
     y0 = b*rho 
     x1 = int(x0 + 1000*(-b)) 
     y1 = int(y0 + 1000*(a)) 
     x2 = int(x0 - 1000*(-b)) 
     y2 = int(y0 - 1000*(a)) 

     #print '%s - 0:(%s,%s) 1:(%s,%s), 2:(%s,%s)' % (i,x0,y0,x1,y1,x2,y2) 

     cv2.line(img, (x1,y1), (x2,y2), (0,0,255), 2) 
     i += 1 

    fn = os.path.splitext(im)[0]+'-lines' 
    cv2.imwrite('imgs/'+fn+'.png', img) 


def match_exes(im, te): 
    img_rgb = cv2.imread('imgs/'+im) 
    img_gry = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) 
    template = cv2.imread('imgs/'+te, 0) 
    w, h = template.shape[::-1] 

    res = cv2.matchTemplate(img_gry, template, cv2.TM_CCOEFF_NORMED) 
    threshold = 0.71 
    loc = np.where(res >= threshold) 

    pts = [] 
    exes = [] 
    blanks = [] 
    for pt in zip(*loc[::-1]): 
     pts.append(pt) 
     cv2.rectangle(img_rgb, pt, (pt[0]+w, pt[1]+h), (0,0,255), 1) 


    fn = os.path.splitext(im)[0]+'-exes' 
    cv2.imwrite('imgs/'+fn+'.png', img_rgb) 

    return pts, exes, blanks 


def match_horizontal_lines(im, te, te2): 
    img_rgb = cv2.imread('imgs/'+im) 
    img_gry = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) 
    template = cv2.imread('imgs/'+te, 0) 
    w1, h1 = template.shape[::-1] 
    template2 = cv2.imread('imgs/'+te2, 0) 
    w2, h2 = template2.shape[::-1] 

    # first line template (the downward facing line) 
    res1 = cv2.matchTemplate(img_gry, template, cv2.TM_CCOEFF_NORMED) 
    threshold1 = 0.8 
    loc1 = np.where(res1 >= threshold1) 

    # second line template (the upward facing line) 
    res2 = cv2.matchTemplate(img_gry, template2, cv2.TM_CCOEFF_NORMED) 
    threshold2 = 0.8 
    loc2 = np.where(res2 >= threshold2) 

    pts = [] 
    exes = [] 
    blanks = [] 

    # find first line template (the downward facing line) 
    for pt in zip(*loc1[::-1]): 
     pts.append(pt) 
     cv2.rectangle(img_rgb, pt, (pt[0]+w1, pt[1]+h1), (0,0,255), 1) 

    # find second line template (the upward facing line) 
    for pt in zip(*loc2[::-1]): 
     pts.append(pt) 
     cv2.rectangle(img_rgb, pt, (pt[0]+w2, pt[0]+h2), (0,0,255), 1) 

    fn = os.path.splitext(im)[0]+'-horiz' 
    cv2.imwrite('imgs/'+fn+'.png', img_rgb) 

    return pts, exes, blanks 


# process 
text = '' 
for img in images: 
    print 'processing %s' % img 
    hough_transform(img) 
    pts, exes, blanks = match_exes(img, templates[0]) 
    pts1, exes1, blanks1 = match_horizontal_lines(img, templates[1], templates[2]) 
    text += '%s: %s x\'s & %s horizontal lines\n' % (img, len(pts), len(pts1)) 

# statistics file 
outputFile = open('counts.txt', 'w') 
outputFile.write(text) 
outputFile.close() 

而且,输出图像看起来是这样的(你可以看到,当检测到所有的x的,但并不是所有的线路) X的 enter image description here

水平线 enter image description here

Hough变换 enter image description here

正如我所说,我其实只是试图从表中获取数据,类似于这种形式(表格图像的第一行):

row a = [x,x, , , , ,x, ,x,x, ,x, ,x, , , , ,x, , , ,x,x,x, ,x, ,x, , , , ] 

我需要x的数量以及空格的数量。 还会有其他表格图像与此类似(所有图像具有x和相同数量的列以及不同数量的行)。

此外,我正在使用python 2.7

+1

你似乎非常非常接近。看看你的Hough线,你应该能够想出例如第一个单元(第0行,第0列)的边界。然后在这些边界内只检查一个'x'并相应地更新表格。不幸的是,我的Python相当弱,或者我会发布更具体的答案。 – beaker 2015-01-15 22:44:36

+0

我在hough变换中注意到的问题是,它为桌子上的每一行绘制了2行。我将线宽从2设置为1以查看差异。现在,我试图使用模板匹配映射所有x,并查看哪些在同一行上,等等。 – user 2015-01-16 19:43:00

+0

双线可能是因为“前景”是黑色的,而“背景”是白色的。先尝试颠倒颜色。 – beaker 2015-01-16 19:50:00

回答

2

好吧,我已经想通了。我使用@beaker提供的关于网格线之间的建议。

在此之前,我不得不从hough转换代码中删除重复的行。然后,我将这些剩余的行分成两列,纵向和横向。从那里,我可以循环水平,然后垂直,然后创建一个感兴趣的区域(ROI)图像。每个roi图像表示表格主图像中的“单元格”。我检查了每个单元格的轮廓,并注意到单元格中有'x'的时候,len(contours) >= 2。所以,任何len(contours) < 2是一个空白区域(我做了几个测试程序来解决这个问题)。下面是我用得到它的工作代码:

import cv2 
import numpy as np 
import os 

# the list of images (tables) 
images = ['table1.png', 'table2.png', 'table3.png', 'table4.png', 'table5.png'] 

# the list of templates (used for template matching) 
templates = ['train1.png'] 

def remove_duplicates(lines): 
    # remove duplicate lines (lines within 10 pixels of eachother) 
    for x1, y1, x2, y2 in lines: 
     for index, (x3, y3, x4, y4) in enumerate(lines): 
      if y1 == y2 and y3 == y4: 
       diff = abs(y1-y3) 
      elif x1 == x2 and x3 == x4: 
       diff = abs(x1-x3) 
      else: 
       diff = 0 
      if diff < 10 and diff is not 0: 
       del lines[index] 
    return lines 


def sort_line_list(lines): 
    # sort lines into horizontal and vertical 
    vertical = [] 
    horizontal = [] 
    for line in lines: 
     if line[0] == line[2]: 
      vertical.append(line) 
     elif line[1] == line[3]: 
      horizontal.append(line) 
    vertical.sort() 
    horizontal.sort(key=lambda x: x[1]) 
    return horizontal, vertical 


def hough_transform_p(image, template, tableCnt): 
    # open and process images 
    img = cv2.imread('imgs/'+image) 
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 
    edges = cv2.Canny(gray, 50, 150, apertureSize=3) 

    # probabilistic hough transform 
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, 200, minLineLength=20, maxLineGap=999)[0].tolist() 

    # remove duplicates 
    lines = remove_duplicates(lines) 

    # draw image 
    for x1, y1, x2, y2 in lines: 
     cv2.line(img, (x1, y1), (x2, y2), (0, 0, 255), 1) 

    # sort lines into vertical & horizontal lists 
    horizontal, vertical = sort_line_list(lines) 

    # go through each horizontal line (aka row) 
    rows = [] 
    for i, h in enumerate(horizontal): 
     if i < len(horizontal)-1: 
      row = [] 
      for j, v in enumerate(vertical): 
       if i < len(horizontal)-1 and j < len(vertical)-1: 
        # every cell before last cell 
        # get width & height 
        width = horizontal[i+1][1] - h[1] 
        height = vertical[j+1][0] - v[0] 

       else: 
        # last cell, width = cell start to end of image 
        # get width & height 
        width = tW 
        height = tH 
       tW = width 
       tH = height 

       # get roi (region of interest) to find an x 
       roi = img[h[1]:h[1]+width, v[0]:v[0]+height] 

       # save image (for testing) 
       dir = 'imgs/table%s' % (tableCnt+1) 
       if not os.path.exists(dir): 
        os.makedirs(dir) 
       fn = '%s/roi_r%s-c%s.png' % (dir, i, j) 
       cv2.imwrite(fn, roi) 

       # if roi contains an x, add x to array, else add _ 
       roi_gry = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) 
       ret, thresh = cv2.threshold(roi_gry, 127, 255, 0) 
       contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 

       if len(contours) > 1: 
        # there is an x for 2 or more contours 
        row.append('x') 
       else: 
        # there is no x when len(contours) is <= 1 
        row.append('_') 
      row.pop() 
      rows.append(row) 

    # save image (for testing) 
    fn = os.path.splitext(image)[0] + '-hough_p.png' 
    cv2.imwrite('imgs/'+fn, img) 


def process(): 
    for i, img in enumerate(images): 
     # perform probabilistic hough transform on each image 
     hough_transform_p(img, templates[0], i) 


if __name__ == '__main__': 
    process() 

因此,样本图像: enter image description here

而且,输出(代码生成文本文件,为简便起见已被删除): enter image description here

正如您所看到的,文本文件包含与图像相同位置的x的数量相同。现在困难的部分已经结束,我可以继续完成我的任务!