2016-04-13 121 views
3

我想读取具有json内容的文件并将其转换为基于某些字段的表格数据。如何将文件中的json条目转换为数据框?

该文件包括这样的内容:

{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"} 

我需要能够转换时间戳,源主机,状态字段withing payloadData部每行到数据帧中R.

我已经试过这样:

库(rjson) d < -fromJSON(文件= “file.txt的”)

dput(d) 
structure(list(senderDateTimeStamp = "2016/04/08 10:03:18", senderHost = NULL, 
        senderAppcode = "web", senderUsecase = "appinternalstats_prod", 
        destinationTopic = "web_appinternalstats_realtimedata_topic", 
        correlatedRecord = FALSE, needCorrelationCacheCleanup = FALSE, 
        needCorrelation = FALSE, correlationAttributes = NULL, correlationRecordCount = 0, 
        correlateTimeWindowInMills = 0, lastCorrelationRecord = FALSE, 
        realtimeESStorage = TRUE, receiverDateTimeStamp = 1460124283554, 
        payloadData = structure(list(timestamp = "2016-04-08T10:03:18.244", 
               status = "get", source = "MSG1", 
               region = "", evetid = "", osareqid = "", basis = "", 
               pricingdate = "", content = "", msgname = "", recipient = "", 
               objid = "", idlreqno = "", host = "web01", webservermember = "webSingleton"), 
              .Names = c("timestamp", 
              "status", "source", "region", "evetid", 
              "osareqid", "basis", "pricingdate", "content", "msgname", 
              "recipient", "objid", "idlreqno", "host", "webservermember" 
               )), payloadDataText = "", key = "web:appinternalstats_prod", 
        destinationTopicName = "web_appinternalstats_realtimedata_topic", 
        hdfsPath = "web/appinternalstats_prod", esindex = "web", 
        estype = "appinternalstats_prod", useCase = "appinternalstats_prod", 
        appCode = "web"), .Names = c("senderDateTimeStamp", "senderHost", 
               "senderAppcode", "senderUsecase", "destinationTopic", "correlatedRecord", 
               "needCorrelationCacheCleanup", "needCorrelation", "correlationAttributes", 
               "correlationRecordCount", "correlateTimeWindowInMills", "lastCorrelationRecord", 
               "realtimeESStorage", "receiverDateTimeStamp", "payloadData", 
               "payloadDataText", "key", "destinationTopicName", "hdfsPath", 
               "esindex", "estype", "useCase", "appCode")) 

任何想法如何将json条目的payloadData部分转换为数据框?

+1

运行代码给出了一个错误:在结构上的错误(名单(时间戳=“2016-04-08T10 :03:18.244“,status =”get“,: 'names'属性[16]的长度必须与矢量[15] – user1357015

+0

@ user1357015相同,我已更新了工作dput输出的帖子 – user1471980

回答

1

这可能是你想要的东西:

library(rjson) 
d<-fromJSON(file="file.txt") 
myDf <- do.call("rbind", lapply(d, function(x) { 
       data.frame(TimeStamp = x$payloadData$timestamp, 
          Source = x$payloadData$source, 
          Host = $payloadData$host, 
          Status = x$payloadData$status)})) 
+0

我得到了这个错误:错误:意外的'}'在: “源= d $ payloadData $源, 状态= d $ payloadData $状态}” >) 错误:在意外 ')' “)” >) 错误:在意外 ')' “)” – user1471980

+1

对不起。错过了“)”。现在应该工作。 – Psidom

+0

d $ payloadData中的错误:$运算符对于原子向量无效 – user1471980

1

考虑包tidyjson

library(tidyjson) 
library(magrittr) 

json <- '{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}' 

json %>% 
    gather_keys() 

# head() of above 
# document.id     key 
# 1   1 senderDateTimeStamp 
# 2   1   senderHost 
# 3   1   senderCode 
# 4   1  senderUsecase 
# 5   1 destinationTopic 
# 6   1 correlatedRecord 

json %>% 
    enter_object("payloadData") %>% 
    gather_keys() %>% 
    append_values_string() 

# head() of above 
# document.id   key     string 
# 1   1 timestamp 2016-04-08T10:03:18.244 
# 2   1  status      get 
# 3   1  source     MSG1 
# 4   1  ITEM     TEST1 
# 5   1  basis       
# 6   1 pricingdate       
+0

@JasonAiskalns,json数据在一个文件中。我首先将它读入一个对象中:data <-fromJSON(file =“file.txt”),当我运行你的代码时,接收到这个错误:UseMethod中的错误(“as.tbl_json”): no适用于'as.tbl_json'的方法应用于类“list”的对象 – user1471980

相关问题