MongoDB is pretty simple and great, and we know many of you are probably users, but guess what? It still requires software, and doesn’t come without its limitations. We challenge you to give Orchestrate a try and see the difference for yourself. Orchestrate is easier to get started, can scale to handle anything, offers more data storage and query options, and the service was built by a team that knows NoSQL.

To make the transition painless, we’ve laid out step by step how to move data from MongoDB to Orchestrate. Since Since MongoDB and Orchestrate both store JSON, it makes going from MongoDB to Orchestrate pretty easy.

Export from Mongo

Exporting from Mongo is pretty simple. We can use the mongoexport command line tool to export a collection to a JSON file. For example data, I’m going to use the Zip Code sample file Mongo provides. If you want to use the same sample data, you can import it into Mongo with: mongoimport -d sample -c zips --file zips.json

The mongoexport command takes a database name, collection name, and a file to output to.

mongoexport -d sample -c zips -o output.json

Now we have a JSON file that looks like this:

{ "city" : "AGAWAM", "loc" : [ -72.622739, 42.070206 ], "pop" : 15338, "state" : "MA", "_id" : "01001" }
{ "city" : "CUSHMAN", "loc" : [ -72.51564999999999, 42.377017 ], "pop" : 36963, "state" : "MA", "_id" : "01002" }
...

Import into Orchestrate

To import into Orchestrate we will read the output.json file line by line and add each record into Orchestrate.

Create a Collection

In the Orchestrate Dashboard, create a new collection for the data. I’m going to call mine “sample”.

Set up

We’ll start by creating an new import.js file and require the Orchestrate module.

npm install orchestrate --save  
var db = require('orchestrate')('API KEY');

Save records

Now lets create a function that will save each record into our Orchestrate DB. It’s passed the JSON string, parses it and calls the Orchestrate PUT operation to save it into our collection.

function saveRecord (record) {  
    // parse the JSON for this line
    try {
        var data = JSON.parse(record);
    } catch (e) {
        console.log('Failed to read line', e);
        return;
    }

    // get the key out of the json
    var key = data._id;

    return db.put('sample', key, data)
    .then(function () {
        console.log('Saved: ', key);
        return true;
    });
}

Reading the JSON file line by line

We want to read each line and save it into Orchestrate, so to do this we use the fs, stream, and readline modules.

var fs = require('fs');  
var readline = require('readline');  
var stream = require('stream');

var instream = fs.createReadStream('./output.json');  
var outstream = new stream;  
var rl = readline.createInterface(instream, outstream);

rl.on('line', function(line) {

});

This will read the file ./output.json as a stream and for each line, the function is called.

Async

We need to queue the PUT operations to Orchestrate because we can’t make thousands of calls at once. We can use the async module to queue the operations and wait until one is finished before sending the next.

npm install async --save

And require it in our code:

var async = require('async');

Then we can create a new queue worker. This takes two parameters. First is the function that will be called for each task. Second is the number of concurrent operations. I choose 2 for this example but you could try more.

Inside the function, we’ll call saveRecord. The async module requires we call the callback when our operation is done. So we’ll use then() to call it once our save is done.

var q = async.queue(function (task, callback) {  
    saveRecord(task.data)
    .then(function () {
        callback();
    });
}, 2);

Then all we need to do, is push each line to the queue.

rl.on('line', function(line) {  
    q.push({data: line});
});

Final Code

When we run this, it will iterate over every line and queue it to be saved into our Orchestrate collection. After one save operation is finished, it will save the next one.

var db = require('orchestrate')('API KEY');

var fs = require('fs');  
var async = require('async');  
var readline = require('readline');  
var stream = require('stream');

function saveRecord (record) {  
    // parse the JSON for this line
    try {
        var data = JSON.parse(record);
    } catch (e) {
        console.log('Failed to read line', e);
    }

    // get the key out of the json
    var key = data._id;

    return db.put('sample', key, data)
    .then(function () {
        console.log('Saved: ', key);
        return true;
    });
}

var instream = fs.createReadStream('./output.json');  
var outstream = new stream;  
var rl = readline.createInterface(instream, outstream);

var q = async.queue(function (task, callback) {  
    saveRecord(task.data)
    .then(function () {
        callback();
    });
}, 2);

rl.on('line', function(line) {  
    q.push({data: line});
});

And to run it:

node import.js

Here’s the code in a Gist

Mongo to Orchestrate with Go

The process is very similar in other languages. Here’s the code in Go.

package main

import (
    "bufio"
    "bytes"
    "encoding/json"
    "io"
    "io/ioutil"
    "log"
    "os"
    "path"
    "path/filepath"
    "sync"

    client "github.com/orchestrate-io/orchestrate-go-client"
)

var (
    c = client.NewClient("550002c1-63fa-428b-a04a-8d35e64d05b7")
    w sync.WaitGroup
)

func main() {
    if len(os.Args) == 1 {
        log.Fatalf("You must provide a directory or file to import.\n")
    }
    for _, filename := range os.Args[1:] {
        if s, err := os.Stat(filename); err != nil {
            if os.IsNotExist(err) {
                log.Fatalf("File does not exist: %s\n", filename)
            } else {
                log.Fatalf("Error reading %s: %#v\n", filename, err)
            }
        } else if s.IsDir() {
            if files, err := ioutil.ReadDir(filename); err != nil {
                log.Fatalf("Error reading %s: %#v\n", filename, err)
            } else {
                for _, file := range files {
                    w.Add(1)
                    go importFile(path.Join(filename, file.Name()))
                }
            }
        } else {
            w.Add(1)
            go importFile(filename)
        }
    }

    w.Wait()
}

func importFile(filename string) {
    defer w.Done()

    log.Printf("Importing %v ...", filename)

    file, err := os.Open(filename)
    if err != nil {
        log.Printf("Error: %v\n", err)
        return
    }
    defer file.Close()

    reader := bufio.NewReaderSize(file, 1024*1024)
    base := filepath.Base(filename)
    indexingConflicts := int64(0)

    for {
        data, err := reader.ReadBytes('\n')
        if err != nil && err == io.EOF {
            log.Printf("Done importing %v (with %v indexing conflicts)", filename, indexingConflicts)
        } else if err != nil {
            log.Panicf("Scanner error: %v\n", err)
        }

        jsonData := make(map[string]interface{})
        if err := json.Unmarshal(data, jsonData); err != nil {
            log.Panicf("Error unmarshaling json in %s: %#v\n", filename, err)
        }

        if id, ok := jsonData["_id"]; !ok {
            log.Panicf("[_id] element not found in %s\n", filename)
        } else if oid, ok := id.(map[string]interface{})["$oid"]; !ok {
            log.Panicf("[_id][$oid] element not found in %s\n", filename)
        } else if oidStr, ok := oid.(string); !ok {
            log.Panicf("[_id][$oid] element is not a string in.\n", filename)
        } else if err := c.Put(base, oidStr, bytes.NewReader(data)); err != nil {
            if oerr, ok := err.(*client.OrchestrateError); ok && oerr.Status == "409 Conflict" {
                indexingConflicts++
            } else {
                log.Panicf("Error importing data: %#v\n", err)
            }
        }
    }
}

Photo Credit: JD Hancock via Compfight cc