Skip to content

Commit

Permalink
batch processing work
Browse files Browse the repository at this point in the history
  • Loading branch information
dougchestnut committed Jul 22, 2024
1 parent 7312273 commit 319da68
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 90 deletions.
14 changes: 8 additions & 6 deletions packages/markdown-assistant/a11y-images-alt.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { loadEnv, createBatchFile, readBatchOutput, generateDefaultOutputPath } from './utils.js';
import { loadEnv, createBatchFile, processMarkdown, readBatchOutput } from './utils.js';
import fs from 'fs/promises';
import { visitParents } from 'unist-util-visit-parents';
import { unified } from 'unified';
import markdown from 'remark-parse';
import stringify from 'remark-stringify';
import path from 'path';

const encodeImage = async (filePath) => {
const imageBuffer = await fs.readFile(filePath);
Expand Down Expand Up @@ -79,16 +80,17 @@ async function processAccessibility(options) {
throw new Error('API key is missing or .env is not loaded.');
}

if (options.overwrite && options.output) {
console.warn("Warning: Both --overwrite and --output options are specified. Overwrite will take precedence.");
}

const outputPath = options.overwrite ? options.file : (options.output || `${options.file}.out${path.extname(options.file)}`);

const markdownContent = await fs.readFile(options.file, 'utf8');
const instruction = `Process the images in the markdown file to add or overwrite alt text based on accessibility guidelines.` + (options.instruction || '');

const batchFilePath = `${options.file}.batch.jsonl`;

let outputPath = options.output;
if (!outputPath) {
outputPath = generateDefaultOutputPath(options.file);
}

if (options.batch) {
const batchOutput = await readBatchOutput(batchFilePath);
if (batchOutput) {
Expand Down
36 changes: 14 additions & 22 deletions packages/markdown-assistant/batch_processor.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,44 +58,36 @@ async function downloadBatchResults(fileId, batchFilePath) {
}

async function processBatchFile(batchFilePath) {
// Read the original metadata file to check for batch ID
const metadataFilePath = `${batchFilePath}.metadata.json`;
let metadata = {};
const metadataContent = JSON.parse(fs.readFileSync(metadataFilePath, 'utf8'));

// Check if the metadata file exists
if (fs.existsSync(metadataFilePath)) {
const metadataContent = fs.readFileSync(metadataFilePath, 'utf8');
metadata = JSON.parse(metadataContent);
}

// If the batch is not submitted yet, submit it
if (!metadata.batchId) {
if (!metadataContent.batchId) {
const uploadedFile = await uploadBatchFile(batchFilePath);
console.log('Batch file uploaded:', uploadedFile);

const batch = await createBatch(uploadedFile);
console.log('Batch created:', batch);

// Save the batch ID to the metadata file
metadata.batchId = batch.id;
fs.writeFileSync(metadataFilePath, JSON.stringify(metadata, null, 2));
// Update the batch file with the batch ID
metadataContent.batchId = batch.id;
fs.writeFileSync(metadataFilePath, JSON.stringify(metadataContent, null, 2));
}

// Check the batch status
const batchStatus = await checkBatchStatus(metadata.batchId);
const batchStatus = await checkBatchStatus(metadataContent.batchId);
console.log('Current status:', batchStatus.status);

if (batchStatus.status === 'completed') {
await downloadBatchResults(batchStatus.output_file_id, batchFilePath);
// Cleanup the batch file and metadata file
fs.unlinkSync(batchFilePath);
fs.unlinkSync(metadataFilePath);
console.log(`Batch file ${batchFilePath} and metadata file ${metadataFilePath} have been removed.`);
if (batchStatus.output_file_id) {
await downloadBatchResults(batchStatus.output_file_id, batchFilePath);
fs.unlinkSync(batchFilePath);
fs.unlinkSync(metadataFilePath);
} else {
console.error('Batch completed but no output file was generated.');
}
} else if (['failed', 'cancelled', 'expired'].includes(batchStatus.status)) {
console.error('Batch processing failed or cancelled:', batchStatus);
// Cleanup the batch file and metadata file
fs.unlinkSync(batchFilePath);
fs.unlinkSync(metadataFilePath);
console.log(`Batch file ${batchFilePath} and metadata file ${metadataFilePath} have been removed.`);
}
}

Expand Down
77 changes: 28 additions & 49 deletions packages/markdown-assistant/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@ import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { loadEnv, createBatchFile, processMarkdown, readBatchOutput } from './utils.js';
import fs from 'fs/promises';
import path from 'path';

async function formatMarkdown(options) {
if (!await loadEnv() || !process.env.OPENAI_API_KEY) {
throw new Error('API key is missing or .env is not loaded.');
}

if (options.overwrite && options.output) {
console.warn("Warning: Both --overwrite and --output options are specified. Overwrite will take precedence.");
}

const outputPath = options.overwrite ? options.file : (options.output || `${options.file}.out${path.extname(options.file)}`);

let instruction = options.instruction || 'Please format this markdown correctly for the web site.';

instruction += `
Expand All @@ -21,67 +26,41 @@ async function formatMarkdown(options) {
- Don't wrap the markdown in ${"```"}
`;

const processFile = async (filePath) => {
const batchFilePath = `${filePath}.batch.jsonl`;

let outputPath = options.output;
if (!outputPath) {
const parsedPath = path.parse(filePath);
outputPath = path.join(parsedPath.dir, `${parsedPath.name}.out${parsedPath.ext}`);
}
const batchFilePath = `${options.file}.batch.jsonl`;

if (options.batch) {
const batchOutput = await readBatchOutput(batchFilePath);
if (batchOutput) {
console.log(batchOutput);
if (outputPath) {
await fs.writeFile(outputPath, batchOutput);
console.log(`Output written to ${outputPath}`);
}
} else {
await createBatchFile({
filePath: filePath,
instruction: instruction,
output: batchFilePath,
originalOutputPath: outputPath // Store the original output path in the batch file
});
}
if (options.batch) {
const batchOutput = await readBatchOutput(batchFilePath);
if (batchOutput) {
console.log(batchOutput);
await fs.writeFile(outputPath, batchOutput);
console.log(`Output written to ${outputPath}`);
} else {
const formattedContent = await processMarkdown({
apiKey: process.env.OPENAI_API_KEY,
filePath: filePath,
instruction: instruction
await createBatchFile({
filePath: options.file,
instruction: instruction,
output: batchFilePath,
originalOutputPath: outputPath // Store the original output path in the batch file
});

if (outputPath) {
await fs.writeFile(outputPath, formattedContent);
console.log(`Output written to ${outputPath}`);
} else {
console.log(formattedContent);
}
}
};

const stats = await fs.stat(options.file);
if (stats.isDirectory()) {
const files = await fs.readdir(options.file);
for (const file of files) {
const filePath = path.join(options.file, file);
if (path.extname(filePath) === '.md') {
await processFile(filePath);
}
}
} else {
await processFile(options.file);
const formattedContent = await processMarkdown({
apiKey: process.env.OPENAI_API_KEY,
filePath: options.file,
instruction: instruction
});

await fs.writeFile(outputPath, formattedContent);
console.log(`Output written to ${outputPath}`);
}
}

if (import.meta.url === `file://${process.argv[1]}`) {
const argv = yargs(hideBin(process.argv))
.option('file', { alias: 'f', describe: 'Path to the markdown file or directory', type: 'string', demandOption: true })
.option('file', { alias: 'f', describe: 'Path to the markdown file', type: 'string', demandOption: true })
.option('instruction', { alias: 'i', describe: 'Instruction to process the markdown', type: 'string', default: 'Please format this markdown correctly.' })
.option('output', { alias: 'o', describe: 'Output file path', type: 'string' })
.option('batch', { alias: 'b', describe: 'Create a batch file and use output if present', type: 'boolean', default: false })
.option('overwrite', { alias: 'ow', describe: 'Overwrite the input file with the output', type: 'boolean', default: false })
.parse();

formatMarkdown(argv).catch(e => console.error(e));
Expand Down
17 changes: 10 additions & 7 deletions packages/markdown-assistant/metadata.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import { loadEnv, createBatchFile, processMarkdown, readBatchOutput, generateDefaultOutputPath } from './utils.js';
import { loadEnv, createBatchFile, processMarkdown, readBatchOutput } from './utils.js';
import fs from 'fs/promises';
import path from 'path';

async function processMetadata(options) {
if (!await loadEnv() || !process.env.OPENAI_API_KEY) {
throw new Error('API key is missing or .env is not loaded.');
}

if (options.overwrite && options.output) {
console.warn("Warning: Both --overwrite and --output options are specified. Overwrite will take precedence.");
}

const outputPath = options.overwrite ? options.file : (options.output || `${options.file}.out${path.extname(options.file)}`);

const instruction = `
The metadata should take the following form:
***
Expand Down Expand Up @@ -37,11 +44,6 @@ The following rules must be followed:

const batchFilePath = `${options.file}.batch.jsonl`;

let outputPath = options.output;
if (!outputPath) {
outputPath = generateDefaultOutputPath(options.file);
}

if (options.batch) {
const batchOutput = await readBatchOutput(batchFilePath);
if (batchOutput) {
Expand All @@ -65,7 +67,7 @@ The following rules must be followed:
});
}
} else {
const content = await processMarkdown({
let content = await processMarkdown({
apiKey: process.env.OPENAI_API_KEY,
filePath: options.file,
instruction: instruction
Expand All @@ -88,6 +90,7 @@ if (import.meta.url === `file://${process.argv[1]}`) {
.option('output', { alias: 'o', describe: 'Output file path', type: 'string' })
.option('embed', { alias: 'e', type: 'boolean', describe: 'Embed JSON-LD metadata into the original markdown', default: false })
.option('batch', { alias: 'b', describe: 'Create a batch file and use output if present', type: 'boolean', default: false })
.option('overwrite', { alias: 'ow', describe: 'Overwrite the input file with the output', type: 'boolean', default: false })
.parse();

processMetadata(argv).catch(e => console.error(e));
Expand Down

This file was deleted.

This file was deleted.

0 comments on commit 319da68

Please sign in to comment.