2015-04-06 35 views
0

我试图迁移到postgresql的JSON数据如下。将此JSON数据迁移到postgresql时发生错误

JSON Data: 
    { 
    "wsgi.multiprocess": true, 
    "HTTP_REFERER": "http://localhost:9000/", 
    "SCRIPT_NAME": "", 
    "REQUEST_METHOD": "GET", 
    "PATH_INFO": "/api/impressions/i/", 
    "HTTP_ORIGIN": "http://localhost:9000", 
    "SERVER_PROTOCOL": "HTTP/1.1", 
    "QUERY_STRING": "", 
    "CONTENT_LENGTH": "", 
    "HTTP_USER_AGENT": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.104 Safari/537.36", 
    "HTTP_CONNECTION": "keep-alive", 
    "HTTP_COOKIE": "_ga=GA1.3.1851235816.1425597711; sessionid=ihukujut48uhatb1rqtzaed78jszqsyk; csrftoken=8F2CcluTFgGUdCV3mfgnhqxfh2crgDKj; customer=\"AmsrbY7bSj5wiDQPM7xcRa:1YdLVd:nKyRyZNx5aoLLmVRL4o9aN267vI\"", 
    "SERVER_NAME": "app.adomattic.com", 
    "REMOTE_ADDR": "182.186.59.228", 
    "HTTP_X_FIREPHP_VERSION": "0.0.6", 
    "wsgi.url_scheme": "http", 
    "SERVER_PORT": "80", 
    "uwsgi.node": "stage", 
    "HTTP_PUBLISHER_KEY": "ng2HM6ThZehtWHR2tgonBg", 
    "HTTP_DNT": "1", 
    "HTTP_HOST": "app.adomattic.com", 
    "wsgi.multithread": false, 
    "HTTP_CACHE_CONTROL": "max-age=0", 
    "REQUEST_URI": "/api/impressions/i/", 
    "HTTP_ACCEPT": "application/json, text/plain, */*", 
    "wsgi.run_once": false, 
    "REMOTE_PORT": "50740", 
    "HTTP_ACCEPT_LANGUAGE": "en-US,en;q=0.8,ur;q=0.6", 
    "uwsgi.version": "1.9.17.1-debian", 
    "CONTENT_TYPE": "", 
    "DOCUMENT_ROOT": "/usr/share/nginx/html", 
    "CSRF_COOKIE": "8F2CcluTFgGUdCV3mfgnhqxfh2crgDKj", 
    "HTTP_ACCEPT_ENCODING": "gzip, deflate, sdch" 
    } 

要迁移这些数据,首先我创造我的数据库中的表:

CREATE TABLE filtered_data 
(
ROW_ID INT 
,MULTIPROCESS VARCHAR(10) 
,HTTP_REFERER VARCHAR(100) 
,SCRIPT_NAME VARCHAR(20) 
,REQUEST_METHOD VARCHAR(10) 
,PATH_INFO VARCHAR(40) 
,HTTP_ORIGIN VARCHAR(100) 
,SERVER_PROTOCOL VARCHAR(30) 
,QUERY_STRING VARCHAR(50) 
,CONTENT_LENGTH VARCHAR(20) 
,HTTP_USER_AGENT VARCHAR(400) 
,HTTP_CONNECTION VARCHAR(30) 
,HTTP_COOKIE VARCHAR(500) 
,SERVER_NAME VARCHAR(30) 
,REMOTE_ADDR VARCHAR(30) 
,FIREPHP_VERSION VARCHAR(20) 
,URL_SCHEME VARCHAR(10) 
,SERVER_PORT INT 
,NODE VARCHAR(20) 
,PUBLISHER_KEY VARCHAR(30) 
,HTTP_DNT INT 
,HTTP_HOST VARCHAR(30) 
,MULTITHREAD VARCHAR(10) 
,CACHE_CONTROL VARCHAR(20) 
,REQUEST_URI VARCHAR(30) 
,HTTP_ACCEPT VARCHAR(50) 
,RUN_ONCE VARCHAR(10) 
,REMOTE_PORT INT 
,HTTP_ACCEPT_LANGUAGE VARCHAR(30) 
,UWSGI_VERSION VARCHAR(30) 
,CONTENT_TYPE VARCHAR(20) 
,DOCUMENT_ROOT VARCHAR(40) 
,CSRF_COOKIE VARCHAR(50) 
,HTTP_ACCEPT_ENCODING VARCHAR(50) 
); 

创建此表之后,我用下面的查询复制JSON数据到这个表:

COPY raw_data(DATA) FROM 'metadata.txt' DELIMITERS '#' CSV; 

该语句将整个文件加载到跨越多行的单个列中。

然后,我使用下列插入查询到的JSON数据拆分成各个列:

INSERT INTO filtered_data 
SELECT 
row_id 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.multiprocess:',2), ', HTTP_REFERER:',1)) AS MULTIPROCESS 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_REFERER:',2), ', SCRIPT_NAME:',1)) AS HTTP_REFERER 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SCRIPT_NAME:',2), ', REQUEST_METHOD:',1)) AS SCRIPT_NAME 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REQUEST_METHOD:',2), ', PATH_INFO:',1)) AS REQUEST_METHOD 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'PATH_INFO:',2), ', HTTP_ORIGIN:',1)) AS PATH_INFO 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ORIGIN:',2), ', SERVER_PROTOCOL:',1)) AS HTTP_ORIGIN 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SERVER_PROTOCOL:',2), ', QUERY_STRING:',1)) AS SERVER_PROTOCOL 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'QUERY_STRING:',2), ', CONTENT_LENGTH:',1)) AS QUERY_STRING 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CONTENT_LENGTH:',2), ', HTTP_USER_AGENT:',1)) AS CONTENT_LENGTH 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_USER_AGENT:',2), ', HTTP_CONNECTION:',1)) AS HTTP_USER_AGENT 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_CONNECTION:',2), ', HTTP_COOKIE:',1)) AS HTTP_CONNECTION 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_COOKIE:',2), ', SERVER_NAME:',1)) AS HTTP_COOKIE 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'SERVER_NAME:',2), ', REMOTE_ADDR:',1)) AS SERVER_NAME 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REMOTE_ADDR:',2), ', HTTP_X_FIREPHP_VERSION:',1)) AS REMOTE_ADDR 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_X_FIREPHP_VERSION:',2), ', wsgi.url_scheme:',1)) AS FIREPHP_VERSION 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.url_scheme:',2), ', SERVER_PORT:',1)) AS URL_SCHEME 
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', SERVER_PORT:',2), ', uwsgi.node:',1) AS INT) AS SERVER_PORT 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'uwsgi.node:',2), ', HTTP_PUBLISHER_KEY:',1)) AS NODE 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_PUBLISHER_KEY:',2), ', HTTP_DNT:',1)) AS PUBLISHER_KEY 
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', HTTP_DNT:',2), ', HTTP_HOST:',1) AS INT) AS HTTP_DNT 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_HOST:',2), ', wsgi.multithread:',1)) AS HTTP_HOST 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.multithread:',2), ', HTTP_CACHE_CONTROL:',1)) AS MULTITHREAD 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_CACHE_CONTROL:',2), ', REQUEST_URI:',1)) AS CACHE_CONTROL 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'REQUEST_URI:',2), ', HTTP_ACCEPT:',1)) AS REQUEST_URI 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT:',2), ', wsgi.run_once:',1)) AS HTTP_ACCEPT 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'wsgi.run_once:',2), ', REMOTE_PORT:',1)) AS RUN_ONCE 
,CAST(SPLIT_PART(SPLIT_PART(DATA, ', REMOTE_PORT:',2), ', HTTP_ACCEPT_LANGUAGE:',1) AS INT) AS REMOTE_PORT 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT_LANGUAGE:',2), ', uwsgi.version:',1)) AS HTTP_ACCEPT_LANGUAGE 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'uwsgi.version:',2), ', CONTENT_TYPE:',1)) AS UWSGI_VERSION 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CONTENT_TYPE:',2), ', DOCUMENT_ROOT:',1)) AS CONTENT_TYPE 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'DOCUMENT_ROOT:',2), ', CSRF_COOKIE":',1)) AS DOCUMENT_ROOT 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'CSRF_COOKIE:',2), ', HTTP_ACCEPT_ENCODING":',1)) AS CSRF_COOKIET 
,TRIM(SPLIT_PART(SPLIT_PART(DATA, 'HTTP_ACCEPT_ENCODING:',2), ', HTTP_ACCEPT_ENCODING":',1)) AS HTTP_ACCEPT_ENCODING 
FROM raw_data; 

但是,当我运行插入查询,我得到一个错误 -

ERROR: invalid input syntax for integer: ""

。 我只有三个字段作为整数,但都有有效的值。为什么我得到这个错误?

+0

你为什么不只是转换是JSON的一个对象,做一个基于对象的插入到数据库(即ORM或类似的)? – 2015-04-06 03:03:35

+0

@MikeBrant你可以给我一个例子或一个我可以参考的页面吗? – Ambidextrous 2015-04-07 01:16:05

回答

1

如果您使用postgresql> = 9.3,则可以直接使用JSON函数来填充记录。 (json_populate_record) functions-json

INSERT INTO filtered_data 
SELECT * FROM json_populate_record(null::filtered_data, DATA); 
+0

该查询有效,但插入表中的数据是垃圾值。 – Ambidextrous 2015-04-07 01:17:12

+0

在我的示例中,DATA是与一行对应的json对象。 DATA对象键必须与列名称匹配。如果你想基于一个json对象数组来填充整个表,请改用json_populate_recordset。 'INSERT INTO filtered_data SELECT * FROM json_populate_recordset(null :: filtered_data,DATA);' – mathieu 2015-04-07 06:10:02

+0

感谢您的回复。是的,我这样做,但输出是意想不到的。你能看到这个 - http://stackoverflow.com/questions/29482334/inserting-json-data-to-postgresql-query-successful-but-output-is-unexpected – Ambidextrous 2015-04-07 12:55:16

相关问题