如何从JSON文件目录向mongoDB添加多条记录？

我有大约一百万个JSON文件保存在目录“D：/ njs/nodetest1/imports/source1 /”的许多子目录中，我想将它们导入到我的mongoDB数据库的集合“users”中。如何从JSON文件目录向mongoDB添加多条记录？

以下代码正确遍历文件系统。正如你所看到的，它读取目录中的每个项目，如果该项目是一个目录，它将读取其中的每个项目。对于不是目录的每个项目，在发送包含函数的变量之前，它会对其执行一些操作。

function traverseFS (path){ 
    var files = fs.readdirSync(path); 
    for (var i in files){ 
      var currentFile = path + '/' + files[i]; 
      var stats = fs.statSync(currentFile); 
      if (stats.isFile()) 
       runOnFile(currentFile); 
      else 
       traverseFS(currentFile); 
    } 
} 
traverseFS("D:/njs/nodetest1/imports/source1/")

接下来，我对代码运行了一些操作（见下文）。它读取文件，将其解析为JSON对象，将该对象的两个属性读入变量，在变量“entry”中创建一个对象并将该变量传递给另一个函数。

function runOnFile(currentFile){ 
    var fileText = fs.readFileSync(currentFile,'utf8'); 
    var generatedJSON = JSON.parse(fileText); 
    var recordID = generatedJSON.recordID; 
    var recordText = generatedJSON.recordTexts; 
    var entry = {recordID:recordID, recordText:recordText}; 
    insertRecord(entry); 
}

然后将最终的功能应当用于将数据插入到MongoDB中。我认为这是事情出错的地方。

function insertRecord(entry){ 
    var MongoClient = mongodb.MongoClient; 
    var MongoURL = 'mongodb://localhost:27017/my_database_name'; 
    MongoClient.connect(MongoURL, function (err, db) { 
     var collection = db.collection('users'); 
     collection.insert([entry], function (err, result) { 
      db.close(); 
     }); 
    }); 
}

我想到这在文件结构中运行，读取JSON文件为对象，然后将这些对象到MongoDB的我。相反，它会将第一个文件读入数据库，然后停止/挂起。

注：

我不想使用mongoimport，因为我不希望插入从这些文件中的所有数据到我的MongoDB数据库。然而，我并不拘泥于这种方法的任何方面。如果存在其他解决方案，我愿意接受。
这连接到数据库就好了。对于目录中的每个项目，这将成功创建一个“入口”对象并将其传递给insertRecord函数。换句话说，问题必须发生在insertRecord部分。但显然这可能是由过程中的某些事件引起的。
如果我添加错误处理，没有错误产生。我已经将错误处理留在了这篇文章之外，因为它混淆了代码片段的可读性。

来源

2016-12-06 COMisHARD

不清楚您的runOnJson（）函数是应该做的 - 啜食JSON内容和东西到蒙戈？ –

不完全是我问的。我问什么应该具体*函数* runOnJson（）做什么？如果它需要做的就是在文件上运行mongoimport（例如从示例中），那么你是否尝试过？ –

按mongodb2.2 (current latest) documentation，insert已被弃用

DEPRECATED

Use insertOne, insertMany or bulkWrite

那么简单的答案是可能改变collection.insert([entry], ...)到collection.insertOne(entry, ...)，就大功告成了。

那么对于长的答案，你说“一百万的JSON文件”，它通常与值得的开销最少的一个完整的异步方法。

有在示例代码2（潜在的）瓶颈：

fs.readFileSync，这是阻塞操作
连接，插入记录和关闭数据库连接

两个执行“大约一百万次”。诚然，导入通常不会一遍又一遍地完成，并且（希望）不会在需要执行其他重要任务的计算机上执行。尽管如此，示例代码可以很容易地变得更加健壮。

考虑使用glob模块获得JSON文件的列表。

glob('imports/**/*.json', function(error, files) {...})

这为您提供了异步方式的完整文件列表。

然后考虑连接到数据库只有一次，一切都插入并关闭一次。

维护或多或少你的样品中具有相同的步骤，我建议是这样的：

var glob = require('glob'), 
    mongodb = require('mongodb'), 
    fs = require('fs'), 
    MongoClient = mongodb.MongoClient, 
    mongoDSN = 'mongodb://localhost:27017/my_database_name', 
    collection; // moved this to the "global" scope so we can do it only once 

function insertRecord(json, done) { 
    var recordID = json.recordID || null, 
     recordText = json.recordText || null; 

    // the question implies some kind of validation/sanitation/preparation.. 
    if (recordID && recordText) { 
     // NOTE: insert was changed to insertOne 
     return collection.insertOne({recordID: recordID, recordText: recordText}, done); 
    } 

    done('No recordID and/or recordText'); 
} 

function runOnFile(file, done) { 
    // moved to be async 
    fs.readFile(file, function(error, data) { 
     if (error) { 
      return done(error); 
     } 

     var json = JSON.parse(data); 

     if (!json) { 
      return done('Unable to parse JSON: ' + file); 
     } 

     insertRecord(json, done); 
    }); 
} 

function processFiles(files, done) { 
    var next = files.length ? files.shift() : null; 

    if (next) { 
     return runOnFile(next, function(error) { 
      if (error) { 
       console.error(error); 
       // you may or may not want to stop here by throwing an Error 
      } 

      processFiles(files, done); 
     }); 
    } 

    done(); 
} 

MongoClient.connect(mongoDSN, function(error, db) { 
    if (error) { 
     throw new Error(error); 
    } 

    collection = db.collection('users'); 

    glob('imports/**/*.json', function(error, files) { 
     if (error) { 
      throw new Error(error); 
     } 

     processFiles(files, function() { 
      console.log('all done'); 
      db.close(); 
     }); 
    }); 
});

注意：您可以收集多个“入口” -records利用多个插入的性能增益使用insertMany，虽然我感觉插入的记录比描述更复杂，如果处理不正确，它可能会给一些内存问题。

来源

2016-12-09 12:05:47

这似乎很有帮助，我还没有完成消化。但是当我跑它时，我得到了以下错误。我显然很想调试自己 - 但我想我会发布以防万一这个问题很明显。 ||| “{错误：EISDIR：在目录上违规操作，在错误（本机）的errno阅读：-4068，代码：‘EISDIR’，系统调用：‘读’} 全部完成” – COMisHARD

难道是因为有一个目录当中被读为一个文件？如果你使用'glob'方法，它可能意味着该模式正在返回一个目录。这就是为什么我使用'**/*。json'模式，这意味着“在任何文件夹中，不管嵌套层次，所有以'.json'结尾的项目”（我的假设是所有文件实际上都有'.json' extension） –

好吧，我仍然难以将目录地址放入glob（something，...）插槽。我的JSON文件开始位置的完整扩展如下所示：“D：\ njs \ nodetest1 \ imports \ files”在“files”目录中有大约100个子目录，每个子目录都有.json文件。假设我只想要.json文件，你是正确的。但是，如何正确搜索该目录？您一直在帮助我的脚本位于“D：\ njs” – COMisHARD

仅仅构造你的数据为对象的一个大阵，然后运行db.collection.insertMany。

来源

2016-12-14 08:45:07

太多的数据一次保存在RAM中。 – COMisHARD

我建议你这样做使用承诺：

const Bluebird = require('bluebird'); 
const glob = Bluebird.promisify(require('glob')); 
const mongodb = require('mongodb'); 
const fs = Bluebird.promisifyAll(require('fs')); 
const Path = require('path'); 
const MongoClient = mongodb.MongoClient; 

const insertMillionsFromPath = Bluebird.coroutine(function *(path, mongoConnString) { 
    const db = yield MongoClient.connect(mongoConnString); 
    try { 
     const collection = db.collection('users'); 
     const files = yield glob(Path.join(path, "*.json")); 
     yield Bluebird.map(
      files, 
      Bluebird.coroutine(function *(filename) { 
       console.log("reading", filename); 
       const fileContent = yield fs.readFileAsync(filename); 
       const obj = JSON.parse(fileContent); 

       console.log("inserting", filename); 
       yield collection.insertOne(obj); 
      }), 
      {concurrency: 10} // You can increase concurrency here 
     ); 
    } finally { 
     yield db.close(); 
    } 
}); 

insertMillionsFromPath("./myFiles", "mongodb://localhost:27017/database") 
    .then(()=>console.log("OK")) 
    .catch((err)=>console.log("ERROR", err));

为了工作，你需要安装以下软件包：

npm install --save mongodb bluebird glob

，你将需要使用节点.js版本6或更高版本，否则您将需要传输您的JavaScript（由于function *()生成器的使用）。

来源

2016-12-15 01:01:54

如何从JSON文件目录向mongoDB添加多条记录？

回答

相关问题