Node.js client library for controlling OpenRefine.
- [] upload, apply operations, download results, delete project
- pipe
- CLI tool
var openrefine = require('openrefine')
// another server; same usage
var server = openrefine.server('http://localhost:3333')
// projects metadata
openrefine
.projects()
.then(project_metadata => ...)
Project metadata format:
{
"[project_id]": {
"name": "[project_name]",
"created": "[project_creation_time]",
"modified": "[project_modification_time]",
"customMetadata": {}
},
...[More projects]...
}
Create a project and clean up some data:
var project = openrefine
.create('data_cleanup_project') // .create() auto-generates a project name
.accept('csv')
.accept({
separator: ',',
ignoreLines: 1
})
.expose('csv')
.keep(true) // keep data after end() or pipe; default is not keeping
.use([
{
"op": "core/column-split",
"description": "Split column DATE by separator",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "DATE",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "-",
"regex": false,
"maxColumns": 0
}
])
.use(customCleanupAddress()) // customCleanupAddress() returns an array of operations
project
.load('input.csv')
.end(function (data) {
// ...
})
.then(() => project.destroy())
Or use the stream interface:
fs.createStream('input.csv')
.pipe(project)
.pipe(fs.createWriteStream('output.csv'))
A project may have some internal states (project metadata such as name and ID, data imported previously, etc.) To open an existing project, use numeric ID of OpenRefine:
server.open(1234567980)
Delete all data in a project:
project.clean()
Destroy a project after use:
project.destroy()
- Refine API and implementations in Python and in Ruby.