A simple web interface to tessearct OCR
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

30 lines
976 B

"use strict";
const fs = require("fs");
const util = require("util");
const path = require("path");
const { pipeline } = require("stream");
const pump = util.promisify(pipeline);
const crypto = require("crypto");
const mime = require("mime-types");
const exec = util.promisify(require("child_process").execFile);
const UPLOAD_PATH = "./uploads";
module.exports = async function (fastify, opts) {
fastify.register(require("fastify-multipart"));
fastify.post("/ocr", async function (req, reply) {
const data = await req.file();
const uid = crypto.randomBytes(16).toString("hex");
const filename = `${UPLOAD_PATH}/${uid}.${mime.extension(data.mimetype)}`;
const lang = "mal+eng";
await pump(data.file, fs.createWriteStream(filename));
const args = [filename, "stdout", "-l", lang];
const { stdout, stderr } = await exec("tesseract", args);
exec("rm", ["-f", filename]);
return {
text: stdout,
error: stderr,
};
});
};