Enable cryptokitties data from nonfungible.com (#1680)

* enable cryptokitties

    Recall that nonfungible.com sent us an initial data dump so that we
    wouldn't have to pull everything from the API. And recall that we're
    storing that initial dump on S3. Finally, recall that the cryptokitties
    data was particularly voluminous (~1GB), which doesn't easily transfer to
    and from S3.

    These changes provide a CLI script to partition that data (or any such data
    from nonfungible.com) into a set of smaller files; and a scraping script
    that seamlessly recombines those partitions for loading into the database.

* add chainbreakers, chibifighters & mlbcryptobaseball
This commit is contained in:
F. Eugene Aumson
2019-03-15 10:50:59 -04:00
committed by GitHub
parent 7f5a3f12ca
commit 4bf311a282
2 changed files with 183 additions and 65 deletions

View File

@@ -0,0 +1,48 @@
/**
* Needed because we store the initial dump of trades in S3, and some projects
* (namely cryptokitties) have dumps that are too big to be transferred easily
* as one big file to and from S3. This script breaks apart a dump file into a
* set of files containing segments of the data. The number of segments is
* based on S3_CHUNK_SIZES specified for each project, or "publisher" in their
* parlance, in ../../data_sources/nonfungible_dot_com/index.ts.
*
* Usage: $ node partition_nonfungible_dot_com_dump.ts publisher
* Example: $ node partition_nonfungible_dot_com_dump.ts cryptokitties
*
* Expects a to find on disk a data file named
* `sales_summary_${publisher}.json`, as emailed by Daniel of nonfungible.com.
*
* Writes to disk a set of files named `sales_summary_${publisher}${N}.json`.
*
* Probably need to use `node` with --max-old-space-size=1024 or maybe
* even more.
*/
import { readFileSync, writeFileSync } from 'fs';
import { splitEvery } from 'ramda';
import { logUtils } from '@0x/utils';
import {
NonfungibleDotComHistoryResponse,
NonfungibleDotComTradeResponse,
S3_CHUNK_SIZES,
} from '../data_sources/nonfungible_dot_com';
(() => {
const publisher = process.argv[2];
const inputFilename = `sales_summary_${publisher}.json`;
logUtils.log(`Reading input file ${inputFilename}`);
const sourceJson: NonfungibleDotComHistoryResponse = JSON.parse(readFileSync(inputFilename).toString());
const chunkSize = S3_CHUNK_SIZES[publisher];
logUtils.log(`Splitting data into chunks of ${chunkSize} trades each`);
const chunks: NonfungibleDotComTradeResponse[][] = splitEvery(chunkSize, sourceJson.data);
logUtils.log(`Writing ${chunks.length} chunks to disk`);
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
writeFileSync(`sales_summary_${publisher}${chunkIndex}.json`, JSON.stringify(chunks[chunkIndex]));
}
})();