2016-08-25 79 views
1

我尝试从CSV文件的MySQL数据库插入我的数百万条记录,采用Python的 | 熊猫sqlalchemy。有一段时间插入在完成之前中断,或者甚至没有插入单行到数据库。Python Pandas with sqlalchemy |批量插入错误

我的代码是:

import pandas as pd 
from sqlalchemy import create_engine 

df = pd.read_csv('/home/shankar/LAB/Python/Rough/*******.csv') 
# 2nd argument replaces where conditions is False 
df = df.where(pd.notnull(df), None) 
df.head() 
conn_str = "mysql+pymysql://root:[email protected]/MY_DB?charset=utf8&use_unicode=0" 
engine = create_engine(conn_str) 
conn = engine.raw_connection() 
df.to_sql(name='table_name', con=conn, 
     if_exists='append') 
conn.close() 

错误:

--------------------------------------------------------------------------- 
TypeError         Traceback (most recent call last) 
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 
1563    else: 
-> 1564     cur.execute(*args) 
1565    return cur 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args) 
164 
--> 165   query = self.mogrify(query, args) 
166 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args) 
143   if args is not None: 
--> 144    query = query % self._escape_args(args, conn) 
145 

TypeError: not all arguments converted during string formatting 

During handling of the above exception, another exception occurred: 

DatabaseError        Traceback (most recent call last) 
<ipython-input-6-bb91db9eb97e> in <module>() 
11 df.to_sql(name='company', con=conn, 
12   if_exists='append', 
---> 13   chunksize=10000) 
14 conn.close() 

/home/shankar/.local/lib/python3.5/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype) 
1163   sql.to_sql(self, name, con, flavor=flavor, schema=schema, 
1164     if_exists=if_exists, index=index, index_label=index_label, 
-> 1165     chunksize=chunksize, dtype=dtype) 
1166 
1167  def to_pickle(self, path): 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype) 
569  pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index, 
570      index_label=index_label, schema=schema, 
--> 571      chunksize=chunksize, dtype=dtype) 
572 
573 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype) 
1659        if_exists=if_exists, index_label=index_label, 
1660        dtype=dtype) 
-> 1661   table.create() 
1662   table.insert(chunksize) 
1663 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in create(self) 
688 
689  def create(self): 
--> 690   if self.exists(): 
691    if self.if_exists == 'fail': 
692     raise ValueError("Table '%s' already exists." % self.name) 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in exists(self) 
676 
677  def exists(self): 
--> 678   return self.pd_sql.has_table(self.name, self.schema) 
679 
680  def sql_schema(self): 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in has_table(self, name, schema) 
1674   query = flavor_map.get(self.flavor) 
1675 
-> 1676   return len(self.execute(query, [name, ]).fetchall()) > 0 
1677 
1678  def get_table(self, table_name, schema=None): 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 
1574    ex = DatabaseError(
1575     "Execution failed on sql '%s': %s" % (args[0], exc)) 
-> 1576    raise_with_traceback(ex) 
1577 
1578  @staticmethod 

/home/shankar/.local/lib/python3.5/site-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback) 
331   if traceback == Ellipsis: 
332    _, _, traceback = sys.exc_info() 
--> 333   raise exc.with_traceback(traceback) 
334 else: 
335  # this version of raise is a syntax error in Python 3 

/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 
1562     cur.execute(*args, **kwargs) 
1563    else: 
-> 1564     cur.execute(*args) 
1565    return cur 
1566   except Exception as exc: 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args) 
163    pass 
164 
--> 165   query = self.mogrify(query, args) 
166 
167   result = self._query(query) 

/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args) 
142 
143   if args is not None: 
--> 144    query = query % self._escape_args(args, conn) 
145 
146   return query 

    DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting 

此错误是在某种CSV发生文件只。 请通知我这个错误!

在此先感谢。

回答

0

从错误中提出你的参数到查询不是所有的字符串,所以你将不得不做的是去数据框的每个元素转换为字符串。 df =df.astype(str)

+0

我得到错误AS “名字 'creaqte_engine' 没有定义” ----------------------------- ---------------------------------------------- NameError Traceback( ) in () 7 df.head() 8 conn_str =“mysql + pymysql:// root:root @ localhost/globalTracker?charset = utf8&use_unicode = 0 “ ----> 9 engine = creaqte_engine(conn_str) 10 conn = engine.raw_connection() 11#df = df.astype(str) NameErr或:名称'creaqte_engine'未定义 –

+0

只需修复错字 - 从'crea * q * te_engine'删除'q' – van