我试图使用Apache beam-dataflow连接到安装在云实例中的配置单元实例。当我运行这个,我得到以下例外。当我使用Apache beam访问这个数据库时发生这种情况。我见过很多与apache beam或google数据流无关的相关问题。Apache Beam - org.apache.beam.sdk.util.UserCodeException:java.sql.SQLException:无法创建PoolableConnectionFactory(方法不支持)
(c9ec8fdbe9d1719a): java.lang.RuntimeException: org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException: Cannot create PoolableConnectionFactory (Method not supported)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:289)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:261)
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:55)
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:43)
at com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:78)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.create(MapTaskExecutorFactory.java:152)
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.doWork(DataflowWorker.java:272)
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:244)
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:125)
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:105)
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:92)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.beam.sdk.util.UserCodeException: java.sql.SQLException: Cannot create PoolableConnectionFactory (Method not supported)
at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeException.java:36)
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn$auxiliary$8CR0LcYI.invokeSetup(Unknown Source)
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:65)
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:47)
at com.google.cloud.dataflow.worker.runners.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:100)
at com.google.cloud.dataflow.worker.runners.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:70)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.createParDoOperation(MapTaskExecutorFactory.java:365)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:278)
... 14 more
Caused by: java.sql.SQLException: Cannot create PoolableConnectionFactory (Method not supported)
at org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:2294)
at org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:2039)
at org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:1533)
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn.setup(JdbcIO.java:377)
Caused by: java.sql.SQLException: Method not supported
at org.apache.hive.jdbc.HiveConnection.isValid(HiveConnection.java:898)
at org.apache.commons.dbcp2.DelegatingConnection.isValid(DelegatingConnection.java:918)
at org.apache.commons.dbcp2.PoolableConnection.validate(PoolableConnection.java:283)
at org.apache.commons.dbcp2.PoolableConnectionFactory.validateConnection(PoolableConnectionFactory.java:357)
at org.apache.commons.dbcp2.BasicDataSource.validateConnectionFactory(BasicDataSource.java:2307)
at org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:2290)
at org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:2039)
at org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:1533)
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn.setup(JdbcIO.java:377)
at org.apache.beam.sdk.io.jdbc.JdbcIO$Read$ReadFn$auxiliary$8CR0LcYI.invokeSetup(Unknown Source)
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.deserializeCopy(DoFnInstanceManagers.java:65)
at com.google.cloud.dataflow.worker.runners.worker.DoFnInstanceManagers$ConcurrentQueueInstanceManager.peek(DoFnInstanceManagers.java:47)
at com.google.cloud.dataflow.worker.runners.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:100)
at com.google.cloud.dataflow.worker.runners.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:70)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.createParDoOperation(MapTaskExecutorFactory.java:365)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:278)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory$3.typedApply(MapTaskExecutorFactory.java:261)
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:55)
at com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:43)
at com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:78)
at com.google.cloud.dataflow.worker.runners.worker.MapTaskExecutorFactory.create(MapTaskExecutorFactory.java:152)
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.doWork(DataflowWorker.java:272)
at com.google.cloud.dataflow.worker.runners.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:244)
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.doWork(DataflowBatchWorkerHarness.java:125)
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:105)
at com.google.cloud.dataflow.worker.runners.worker.DataflowBatchWorkerHarness$WorkerThread.call(DataflowBatchWorkerHarness.java:92)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
使用相同的连接字符串和驱动程序文件,我可以使用普通的java-jdbc程序连接到此实例。
这个问题已经有一段时间了,我无法找到解决方案。任何人都可以请提供任何想法吗?
请参阅代码片段连接下面蜂巢:
PCollection<Customer> collection = dataflowPipeline.apply(JdbcIO.<Customer>read()
.withDataSourceConfiguration(JdbcIO.DataSourceConfiguration
.create("org.apache.hive.jdbc.HiveDriver", "jdbc:hive2://<external IP of computer instance>:10000/dbtest")
.withUsername("username").withPassword("password"))
.withQuery(
"select c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,from dbtest.customer")
.withRowMapper(new JdbcIO.RowMapper<Customer>() {
@Override
public Customer mapRow(ResultSet resultSet) throws Exception {
// TODO Auto-generated method stub
Customer customer = new Customer();
customer.setC_customer_id(resultSet.getString("c_customer_id"));
customer.setC_first_name(resultSet.getString("c_first_name"));
customer.setC_last_name(resultSet.getString("c_last_name"));
customer.setC_preferred_cust_flag(resultSet.getString("c_preferred_cust_flag"));
customer.setC_birth_day(resultSet.getInt("c_birth_day"));
return customer;
}
}).withCoder(AvroCoder.of(Customer.class)));
hi @jkff。我忘了在这里更新这个问题。看到这个问题后,我做了一些分析,并尝试使用2.1.1版本的hive-jdbc。现在,我遇到了这个问题 - util.UserCodeException:java.sql.SQLException:无法创建PoolableConnectionFactory(无法使用JDBC开放客户端传输Uri:jdbc:hive2://:3306/db/java.net.ConnectException:连接超时) \t at dataflow.worker.runners.worker.MapTaskExecutorFactory $ 3.typedApply(MapTaskExecutorFactory.java:289)。看起来它没有获得连接。 –
Balu
这看起来像是常规网络问题,而不是数据流问题。您是否可以使用常规Java程序中的相同参数而不是Dataflow管道连接到同一个数据库? – jkff
是的。我可以使用普通的java程序成功地拉出数据。我想知道,从db开始读取时,数据流可以创建多少个线程。由于德比是为配置单元配置的,因此最初未启用Connectionpooling。后来我们修改为mysql。但是,安装cloudera的配置较少的vm非常慢,并且由于资源不足而无法接受共享。你认为,我的假设是正确的吗? – Balu