How do I use the pipeline from stream-json to write to file, in nodeJs?

I’m trying to use stream-json to read a zip, unzip it, and then write it to file. I don’t think I understand how to use the library.

Based on the link above, they have this example:

const {chain}  = require('stream-chain');

const {parser} = require('stream-json');
const {pick}   = require('stream-json/filters/Pick');
const {ignore} = require('stream-json/filters/Ignore');
const {streamValues} = require('stream-json/streamers/StreamValues');

const fs   = require('fs');
const zlib = require('zlib');

const pipeline = chain([
  fs.createReadStream('sample.json.gz'),
  zlib.createGunzip(),
  parser(),
  pick({filter: 'data'}),
  ignore({filter: /b_metab/i}),
  streamValues(),
  data => {
    const value = data.value;
    // keep data only for the accounting department
    return value && value.department === 'accounting' ? data : null;
  }
]);

let counter = 0;
pipeline.on('data', () => ++counter);
pipeline.on('end', () =>
  console.log(`The accounting department has ${counter} employees.`));

JavaScript
​x
 
const {chain}  = require('stream-chain');
​
const {parser} = require('stream-json');
const {pick}   = require('stream-json/filters/Pick');
const {ignore} = require('stream-json/filters/Ignore');
const {streamValues} = require('stream-json/streamers/StreamValues');
​
const fs   = require('fs');
const zlib = require('zlib');
​
const pipeline = chain([
  fs.createReadStream('sample.json.gz'),
  zlib.createGunzip(),
  parser(),
  pick({filter: 'data'}),
  ignore({filter: /b_metab/i}),
  streamValues(),
  data => {
    const value = data.value;
    // keep data only for the accounting department
    return value && value.department === 'accounting' ? data : null;
  }
]);
​
let counter = 0;
pipeline.on('data', () => ++counter);
pipeline.on('end', () =>
  console.log(`The accounting department has ${counter} employees.`));
​

However I don’t want to count anything, I just want to write to file. Here is what I have that works:

function unzipJson() {
  const zipPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json.zip');
  const jsonPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json');
  console.info('Attempting to read zip');
  return new Promise((resolve, reject) => {
    let error = null;
    Fs.readFile(zipPath, (err, data) => {
      error = err;
      if (!err) {
        const zip = new JSZip();
        zip.loadAsync(data).then((contents) => {
          Object.keys(contents.files).forEach((filename) => {
            console.info(`Writing ${filename} to disk...`);
            zip.file(filename).async('nodebuffer').then((content) => {
              Fs.writeFileSync(jsonPath, content);
            }).catch((writeErr) => { error = writeErr; });
          });
        }).catch((zipErr) => { error = zipErr; });
        resolve();
      } else if (error) {
        console.log(error);
        reject(error);
      }
    });
  });
}

JavaScript
 
function unzipJson() {
  const zipPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json.zip');
  const jsonPath = Path.resolve(__dirname, 'resources', 'AllPrintings.json');
  console.info('Attempting to read zip');
  return new Promise((resolve, reject) => {
    let error = null;
    Fs.readFile(zipPath, (err, data) => {
      error = err;
      if (!err) {
        const zip = new JSZip();
        zip.loadAsync(data).then((contents) => {
          Object.keys(contents.files).forEach((filename) => {
            console.info(`Writing ${filename} to disk...`);
            zip.file(filename).async('nodebuffer').then((content) => {
              Fs.writeFileSync(jsonPath, content);
            }).catch((writeErr) => { error = writeErr; });
          });
        }).catch((zipErr) => { error = zipErr; });
        resolve();
      } else if (error) {
        console.log(error);
        reject(error);
      }
    });
  });
}
​

However I can’t easily add any processing to this, so I wanted to replace it with stream-json. This is my partial attempt, as I don’t know how to finish:

function unzipJson() {
  const zipPath = Path.resolve(__dirname, 'resources', 'myfile.json.zip');
  const jsonPath = Path.resolve(__dirname, 'resources', 'myfile.json');
  console.info('Attempting to read zip');
  const pipeline = chain([
    Fs.createReadStream(zipPath),
    zlib.createGunzip(),
    parser(),
    Fs.createWriteStream(jsonPath),
  ]);
  // use the chain, and save the result to a file
  pipeline.on(/*what goes here?*/)

JavaScript
 
function unzipJson() {
  const zipPath = Path.resolve(__dirname, 'resources', 'myfile.json.zip');
  const jsonPath = Path.resolve(__dirname, 'resources', 'myfile.json');
  console.info('Attempting to read zip');
  const pipeline = chain([
    Fs.createReadStream(zipPath),
    zlib.createGunzip(),
    parser(),
    Fs.createWriteStream(jsonPath),
  ]);
  // use the chain, and save the result to a file
  pipeline.on(/*what goes here?*/)
​

Later on I intend to add extra processing of the json file(s), but I want to learn the basics before I start throwing in extra functionality.

I can’t produce a minimal example unfortunately, as I don’t know what goes into the pipeline.on function. I’m trying to understand what I should do, not what I’ve done wrong.

I also looked at the related stream-chain, which has an example that ends like so:

// use the chain, and save the result to a file
dataSource.pipe(chain).pipe(fs.createWriteStream('output.txt.gz'));`

JavaScript
 
// use the chain, and save the result to a file
dataSource.pipe(chain).pipe(fs.createWriteStream('output.txt.gz'));`
​

But at no point does the documentation explain where dataSource comes from, and I think my chain creates it’s own by reading the zip from file?

How am I supposed to use these streaming libraries to write to file?

Answer

I don’t want to count anything, I just want to write to file

In that case, you’ll need to convert the token/JSON data stream back into a text stream that you can write to a file. You can use the library’s Stringer for that. Its documentation also contains an example that seems to be more in line with what you want to do:

chain([
  fs.createReadStream('data.json.gz'),
  zlib.createGunzip(),
  parser(),
  pick({filter: 'data'}), // omit this if you don't want to do any processing
  stringer(),
  zlib.Gzip(),            // omit this if you want to write an unzipped result
  fs.createWriteStream('edited.json.gz')
]);

JavaScript
 
chain([
  fs.createReadStream('data.json.gz'),
  zlib.createGunzip(),
  parser(),
  pick({filter: 'data'}), // omit this if you don't want to do any processing
  stringer(),
  zlib.Gzip(),            // omit this if you want to write an unzipped result
  fs.createWriteStream('edited.json.gz')
]);
​

Advertisement

Answer