SimpleExtractor.java

package jpod.leo.imgExtractor;

import org.faceless.pdf2.*;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.*;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.logging.ConsoleHandler;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;

/**
 * Created by leojpod on 2016-09-03.
 */
public class SimpleExtractor {
    public static void main(String[] args) throws IOException {

        System.out.println("let's start processing the parameters");
        System.out.println("starting with " + args[0]);
        InputStream stream;
        try {
            stream = new URL(args[0]).openStream();
        } catch (IOException e) {
            stream = null;
            System.out.println("we did not receive a URL let's see if it matches a file");
        }
        try {
            if (stream == null) {
                stream = new FileInputStream(args[0]);
            }
        } catch (FileNotFoundException e) {
            System.out.println("Could not locate your PDF. Make sure the file locator you gave is correct");
            return;
        }

        String basePath = "imgs";
        File directory = new File(basePath);
        System.out.println("extracting pics to " + directory.getCanonicalPath());
        if (!directory.exists()) {
            System.out.println("creating extraction directory");
            directory.mkdirs();
        } else {
            System.out.println("extraction directory already exists, emptying it");
            File[] files = directory.listFiles();
            if (files != null) {
                for (File f : files) {
                    f.delete();
                }
            }
        }

        PDFReader reader;
        PDF pdf;
        PDFParser parser;
        try {
            reader = new PDFReader(stream);
            pdf = new PDF(reader);
            parser = new PDFParser(pdf);
        } catch (IOException e) {
            System.err.println("Cannot read the PDF. Make sure the file locator you gave match a PDF");
            return;
        }
        List<PDFPage> pages = pdf.getPages();
            List<PageExtractor.Image> allImgs = new ArrayList<>();
            System.out.println("there are " + pages.size() + " pages to process");
            for (PDFPage page :
                    pages) {
                PageExtractor extract = parser.getPageExtractor(page);
                Collection<PageExtractor.Image> imgs = extract.getImages();
                allImgs.addAll(imgs);
            }
            System.out.println("we've got " + allImgs.size() + " images");

            int picCount = 0;
            for (PageExtractor.Image img : allImgs) {
                RenderedImage renderedImage = img.getImage();
                picCount++;

                // export to JPEG
                BufferedImage bufferedImage = new BufferedImage(renderedImage.getWidth(), renderedImage.getHeight(), BufferedImage.TYPE_3BYTE_BGR);
                Graphics2D g2d = bufferedImage.createGraphics();
                g2d.drawRenderedImage(renderedImage, null);
                g2d.dispose();
                ImageIO.write(bufferedImage, "jpg", new FileOutputStream(basePath + "/img" + picCount + ".jpg"));
                // export to PNG
                ImageIO.write(renderedImage, "png", new FileOutputStream(basePath + "/img" + picCount + ".png"));
                ImageIO.write(renderedImage, "gif", new FileOutputStream(basePath + "/img" + picCount + ".gif"));
            }
            System.out.println("extracted " + picCount + " pictures to " + directory.getCanonicalPath());

        System.out.println("we are done!");
    }
}