2017-06-17 180 views
1

我将Elastic的过滤JSON数据作为具有多个级别的嵌套列表。我很有兴趣将嵌套列表转换为Rstudio中的2d进行数据分析。将嵌套列表转换为矩阵

我已经尝试过多重提示在线但没有工作(plytr,rbind,unlist,tidyjson)。我急需您的帮助结构

例子:

> str(RAW2) 
List of 10 
$ :List of 5 
    ..$ _index: chr "xml-search-2017.06.12" 
    ..$ _type : chr "xml" 
    ..$ _id : chr "76595087100_1" 
    ..$ _score: num 13.5 
    ..$ fields:List of 3 
    .. ..$ check_in_date :List of 1 
    .. .. ..$ : chr "2017-06-20T00:00:00.000Z" 
    .. ..$ check_out_date:List of 1 
    .. .. ..$ : chr "2017-06-23T00:00:00.000Z" 
    .. ..$ ts_start  :List of 1 
    .. .. ..$ : chr "2017-06-12T20:11:07.348Z" 
$ :List of 5 
    ..$ _index: chr "xml-search-2017.06.12" 
    ..$ _type : chr "xml" 
    ..$ _id : chr "76595087300_1" 
    ..$ _score: num 13.5 
    ..$ fields:List of 3 
    .. ..$ check_in_date :List of 1 
    .. .. ..$ : chr "2017-06-20T00:00:00.000Z" 
    .. ..$ check_out_date:List of 1 
    .. .. ..$ : chr "2017-06-23T00:00:00.000Z" 
    .. ..$ ts_start  :List of 1 
    .. .. ..$ : chr "2017-06-12T20:11:07.060Z" 

谢谢

list(structure(list(`_index` = "xml-search-2017.06.12", `_type` = "xml", 
    `_id` = "76595087100_1", `_score` = 13.457847, fields = structure(list(
     check_in_date = list("2017-06-20T00:00:00.000Z"), check_out_date = list(
      "2017-06-23T00:00:00.000Z"), ts_start = list("2017-06-12T20:11:07.348Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76595087300_1", `_score` = 13.457847, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
     check_out_date = list("2017-06-23T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:11:07.060Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76594844800_1", `_score` = 13.455816, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
     check_out_date = list("2017-06-22T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:11:03.445Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76597264600_1", `_score` = 13.455816, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
     check_out_date = list("2017-06-16T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:13:15.005Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76599762900_1", `_score` = 13.455723, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
     check_out_date = list("2017-06-28T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:14:37.454Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76595493900_1", `_score` = 13.455723, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
     check_out_date = list("2017-06-23T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:11:07.348Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76597065400_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
     check_out_date = list("2017-06-16T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:13:14.994Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76594777600_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
     check_out_date = list("2017-06-22T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:11:03.440Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76572404700_1", `_score` = 13.169026, 
    fields = structure(list(check_in_date = list("2017-06-13T00:00:00.000Z"), 
     check_out_date = list("2017-06-14T00:00:00.000Z"), ts_start = list(
      "2017-06-12T19:53:56.580Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76598481000_1", `_score` = 12.763965, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
     check_out_date = list("2017-06-28T00:00:00.000Z"), ts_start = list(
      "2017-06-12T20:14:37.452Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields"))) 

集2:

list(structure(list(`_index` = "xml-search-2017.06.12", `_type` = "xml", 
    `_id` = "76452356700_1", `_score` = 2.390721, fields = structure(list(
     check_in_date = list("2017-06-28T00:00:00.000Z"), check_out_date = list(
      "2017-07-02T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:21.311Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452363400_3", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-20T00:00:00.000Z"), 
     check_out_date = list("2017-06-30T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:21.235Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452375900_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-01T00:00:00.000Z"), 
     check_out_date = list("2017-09-03T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:30.092Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452377300_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-22T00:00:00.000Z"), 
     check_out_date = list("2017-06-24T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:31.633Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452383100_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Rodeway Inn South Miami", 
     "East Miami", "Holiday Inn Port Of Miami", "Mayfair Hotel & Spa", 
     "Intercontinental Miami", "Marriott Biscayne Bay", "Eb Hotel Miami", 
     "Doubletree Grand Biscayne Bay", "Hotel Beaux Arts Miami", 
     "Cambria Suites Miami Airport", "Epic Miami, A Kimpton Hotel", 
     "The Ritz-Carlton Coconut Grove", "Quality Inn Miami Airport", 
     "Hilton Miami Downtown", "Conrad Miami", "Miccosukee Resort & Gaming", 
     "Courtyard Downtown", "Jw Marriott Marquis Miami", "Miami Marriott Dadeland", 
     "Courtyard Miami Coral Gables"), check_in_date = list(
     "2017-08-26T00:00:00.000Z"), check_out_date = list("2017-08-29T00:00:00.000Z"), 
     ts_start = list("2017-06-12T18:19:30.198Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452394200_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Rosen Plaza"), 
     check_in_date = list("2017-06-24T00:00:00.000Z"), check_out_date = list(
      "2017-06-27T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:31.672Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452409700_2", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Jw Marriott Hotel Mumbai Sahar"), 
     check_in_date = list("2018-03-12T00:00:00.000Z"), check_out_date = list(
      "2018-03-16T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:46.007Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452420500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Holiday Inn Express Troisdorf"), 
     check_in_date = list("2017-06-25T00:00:00.000Z"), check_out_date = list(
      "2017-06-28T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:40.676Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452421700_2", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-23T00:00:00.000Z"), 
     check_out_date = list("2017-06-26T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:40.932Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452424300_2", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Aston Balikpapan"), 
     check_in_date = list("2017-07-05T00:00:00.000Z"), check_out_date = list(
      "2017-07-06T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:42.293Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425100_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("H10 Duque De Loule"), 
     check_in_date = list("2017-07-28T00:00:00.000Z"), check_out_date = list(
      "2017-07-30T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:42.594Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425500_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-25T00:00:00.000Z"), 
     check_out_date = list("2017-06-26T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:42.719Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452425600_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
     check_out_date = list("2017-06-19T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:42.748Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452427300_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
     check_out_date = list("2017-06-20T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:43.154Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452428100_10", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-11T00:00:00.000Z"), 
     check_out_date = list("2017-09-24T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:43.345Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452428800_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Ibis Istanbul City West"), 
     check_in_date = list("2017-06-30T00:00:00.000Z"), check_out_date = list(
      "2017-07-01T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:43.761Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452431500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("The Rani Hotel And Spa"), 
     check_in_date = list("2017-07-07T00:00:00.000Z"), check_out_date = list(
      "2017-07-14T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:45.460Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452431700_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-17T00:00:00.000Z"), 
     check_out_date = list("2017-06-20T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:45.642Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452434500_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Cloitre St Louis", 
     "De L'Horloge", "Hotel D'Europe", "Danieli", "Bristol", 
     "Kyriad Palais Des Papes", "Regina"), check_in_date = list(
     "2017-06-24T00:00:00.000Z"), check_out_date = list("2017-06-25T00:00:00.000Z"), 
     ts_start = list("2017-06-12T18:19:47.037Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452434700_2", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-08T00:00:00.000Z"), 
     check_out_date = list("2017-09-20T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:47.086Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452447400_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-09-25T00:00:00.000Z"), 
     check_out_date = list("2017-09-30T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:51.056Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452451400_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-07-01T00:00:00.000Z"), 
     check_out_date = list("2017-07-04T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:40.306Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452452500_1", `_score` = 2.390721, 
    fields = structure(list(check_in_date = list("2017-06-18T00:00:00.000Z"), 
     check_out_date = list("2017-06-19T00:00:00.000Z"), ts_start = list(
      "2017-06-12T18:19:52.461Z")), .Names = c("check_in_date", 
    "check_out_date", "ts_start"))), .Names = c("_index", "_type", 
"_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452452800_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Best Western Grand West'S Villas"), 
     check_in_date = list("2017-07-12T00:00:00.000Z"), check_out_date = list(
      "2017-07-14T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:40.753Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields")), structure(list(`_index` = "xml-search-2017.06.12", 
    `_type` = "xml", `_id` = "76452453600_1", `_score` = 2.390721, 
    fields = structure(list(hotel_name = list("Athens Status Suites", 
     "Ambrosia Hotel & Suites", "A For Athens", "Athenswas", 
     "Kimon Athens Hotel", "Chic", "Athinais", "Novus City Hotel"), 
     check_in_date = list("2017-08-12T00:00:00.000Z"), check_out_date = list(
      "2017-08-25T00:00:00.000Z"), ts_start = list("2017-06-12T18:19:54.035Z")), .Names = c("hotel_name", 
    "check_in_date", "check_out_date", "ts_start"))), .Names = c("_index", 
"_type", "_id", "_score", "fields"))) 
+0

请通过[此链接](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r - 可重复的例子)并相应地编辑 – Sotos

+0

@Sotos,完成,谢谢! –

+0

请将您的示例复制/粘贴(请参阅'?dput')。 –

回答

3

它不是完全干净呢,但这里的一个选项:

library(tidyverse) 

df <- raw2 %>% map_df(flatten) %>% unnest() 

df 
#> # A tibble: 10 x 7 
#>     `_index` `_type`   `_id` `_score` 
#>     <chr> <chr>   <chr> <dbl> 
#> 1 xml-search-2017.06.12  xml 76595087100_1 13.45785 
#> 2 xml-search-2017.06.12  xml 76595087300_1 13.45785 
#> 3 xml-search-2017.06.12  xml 76594844800_1 13.45582 
#> 4 xml-search-2017.06.12  xml 76597264600_1 13.45582 
#> 5 xml-search-2017.06.12  xml 76599762900_1 13.45572 
#> 6 xml-search-2017.06.12  xml 76595493900_1 13.45572 
#> 7 xml-search-2017.06.12  xml 76597065400_1 13.16903 
#> 8 xml-search-2017.06.12  xml 76594777600_1 13.16903 
#> 9 xml-search-2017.06.12  xml 76572404700_1 13.16903 
#> 10 xml-search-2017.06.12  xml 76598481000_1 12.76397 
#> # ... with 3 more variables: check_in_date <chr>, check_out_date <chr>, 
#> # ts_start <chr> 
+0

'purrr'的'map *'函数看起来非常方便 – Sotos

+1

是的,我喜欢'map_df'。现在我已经看到了它,这似乎是一个巨大的漏洞,基本版本不能直接简化为data.frame。 – alistaire

+0

@alistaire看起来很有前途!为我提供的例子工作,但当尝试类似的不同的子集时,我得到一个错误。在主文本上附加了新的子集。 难道是因为一些记录有更多的字段? (即一些酒店名称数量为x) –

1

一个基础R版本可能是,

do.call(rbind, lapply(l1, function(i) as.data.frame(t(unlist(i)))))