Rasterize.java
import java.io.*; import java.awt.color.*; import java.awt.image.*; import java.awt.geom.*; import java.util.*; import org.faceless.pdf2.*; /** * Testbed for benchmarking conversion from PDF to rasterized PDF. * * Usage: java Rasterize <file.pdf> * * Will test four options: * 1. Render to hi-res B&W, then re-render to low-res color/gray if required * 2. Render to low-res Color, then convert to gray or re-render as hi-res B&W if required * 3. Render to hi-res Color then downsample as required * 4. No change - render at 200dpi color. * * and save them as "out1.pdf", "out2.pdf", "out3.pdf" and "out4.pdf". While rendering * one letter per page will be printed to System.out: C, G or B for Color, Grayscale or B&W. */ public class Rasterize { public static void main(String[] args) throws IOException { PDF pdf = new PDF(new PDFReader(new File(args[0]))); PDF rasterpdf; if (pdf.getNumberOfPages() > 50) { // Limit testing to 50 pages. pdf.getPages().subList(50, pdf.getPages().size()).clear(); } int colordpi = 200; int bwdpi = 300; long t1 = System.currentTimeMillis(); rasterpdf = rasterize(pdf, bwdpi, colordpi, ByRerenderToColor); rasterpdf.render(new FileOutputStream("out1.pdf")); long t2 = System.currentTimeMillis(); rasterpdf = rasterize(pdf, bwdpi, colordpi, ByRerenderTo1Bit); rasterpdf.render(new FileOutputStream("out2.pdf")); long t3 = System.currentTimeMillis(); rasterpdf = rasterize(pdf, bwdpi, colordpi, ByDownsampling); rasterpdf.render(new FileOutputStream("out3.pdf")); long t4 = System.currentTimeMillis(); rasterpdf = rasterize(pdf, bwdpi, colordpi, NoChange); rasterpdf.render(new FileOutputStream("out4.pdf")); long t5 = System.currentTimeMillis(); System.out.println("Rasterize took "+(t2-t1)+" "+(t3-t2)+" "+(t4-t3)+" "+(t5-t4)); } public static PDF rasterize(PDF pdf, int bwdpi, int colordpi, Rasterizer rasterizer) { PDF newpdf = new PDF(); PDFParser parser = new PDFParser(pdf); List pages = pdf.getPages(); for (int i=0;i<pages.size();i++) { OutputProfile profile = new OutputProfile(OutputProfile.Default); parser.setOutputProfile(profile); PDFPage page = pdf.getPage(i); PagePainter painter = parser.getPagePainter(page); BufferedImage bufimage = rasterizer.toBitmap(painter, profile, colordpi, bwdpi); PDFImage pdfimage = null; try { pdfimage = new PDFImage(bufimage); } catch (InterruptedException e) { // Can't happen with BufferedImage } PDFPage newpage = newpdf.newPage((int)page.getWidth(), (int)page.getHeight()); newpage.drawImage(pdfimage, 0, 0, page.getWidth(), page.getHeight()); } System.out.println(); return newpdf; } /** * Given a 24-bit RGB image, optionally resize it or reduce the number of colors to 8 or 1 * @param image the image to be resize - must be 24-bit RGB with byte-based Raster * @param indpi the DPI the image is currently in * @param outdpi the DPI to convert the image to * @param bpp the number of bits per pixel - 24 for RGB, 8 for Grayscale or 1 for B&W * @return the modified image */ private static BufferedImage fixImage(BufferedImage image, int indpi, int outdpi, int bpp) { // Assuming a DataBufferByte for both input and output image - this will always be // the case in this eaxmple, but some ColorModels (eg ColorModel.getRGBdefault) use // a DataBufferInt. So this is not a general purpose routine, but as used here this // assumption makes the code a lot simpler and more efficient. ColorModel cm = image.getColorModel(); WritableRaster raster = image.getRaster(); int w = raster.getWidth(); int h = raster.getHeight(); if (indpi != outdpi) { double scale = (double)outdpi / indpi; w *= scale; h *= scale; WritableRaster outraster = cm.createCompatibleWritableRaster(w, h); AffineTransform tran = AffineTransform.getScaleInstance(scale, scale); // This is typically hardware accelerated, so can't be beat for performance. AffineTransformOp scaler = new AffineTransformOp(tran, AffineTransformOp.TYPE_BILINEAR); scaler.filter(raster, outraster); raster = outraster; } if (bpp == 8) { // Remove color information but keep 1 pixel per byte - easy. cm = PDFParser.GRAYSCALE; WritableRaster outraster = cm.createCompatibleWritableRaster(w, h); byte[] in = ((DataBufferByte)raster.getDataBuffer()).getData(); byte[] out = ((DataBufferByte)outraster.getDataBuffer()).getData(); int i = 0, j = 0; while (i < in.length) { int rgb = ((in[i++]&0xFF) << 16) | ((in[i++]&0xFF) << 8) | (in[i++]&0xFF); // This is fast way of converting RGB to Grayscale. It's an integer // based version of the standard PAL/NTSC grayscale formula: // gray = 0.3red + 0.59green + 0.11blue int gray = rgb==0xFFFFFF ? 255 : ((((rgb&0xFF0000)/850) + (((rgb<<8)&0xFF0000)/432) + ((rgb<<16)&0xFF0000)/2318)) >> 8; out[j++] = (byte)gray; } raster = outraster; } else if (bpp == 1) { // Remove color information and reduce to 1 bit per pixel, or 7 pixels per byte. // Still fairly simple, we just need to byte align each row. cm = PDFParser.BLACKANDWHITE; WritableRaster outraster = cm.createCompatibleWritableRaster(w, h); byte[] in = ((DataBufferByte)raster.getDataBuffer()).getData(); byte[] out = ((DataBufferByte)outraster.getDataBuffer()).getData(); int i = 0, j = 0; for (int y=0;y<h;y++) { int x = 0, n = 0; for (x=0;x<w;x++) { int rgb = ((in[i++]&0xFF) << 16) | ((in[i++]&0xFF) << 8) | (in[i++]&0xFF); int gray = rgb==0xFFFFFF ? 255 : ((((rgb&0xFF0000)/850) + (((rgb<<8)&0xFF0000)/432) + ((rgb<<16)&0xFF0000)/2318)) >> 8; n <<= 1; if (gray > 128) { // 128 is normal threshold, but you can adjust n |= 1; } if ((x&7) == 7) { // Finished 8 pixel block - push it to output out[j++] = (byte)n; n = 0; } } if ((x&7) != 7) { // Image isn't multiple of 8 wide - shift and push it to output n <<= 8 - (x&7); out[j++] = (byte)n; } } raster = outraster; } else if (bpp != 24) { throw new IllegalArgumentException("bpp must be 24, 8 or 1"); } if (raster != image.getRaster()) { image = new BufferedImage(cm, raster, false, null); } return image; } //--------------------------------------------------------------------------- interface Rasterizer { BufferedImage toBitmap(PagePainter painter, OutputProfile profile, int colordpi, int bwdpi); } static final Rasterizer ByDownsampling = new Rasterizer() { public BufferedImage toBitmap(PagePainter painter, OutputProfile profile, int colordpi, int bwdpi) { BufferedImage image = painter.getImage(bwdpi, PDFParser.RGB); if (profile.isSet(OutputProfile.Feature.ColorImage) || profile.isSet(OutputProfile.Feature.ColorContent)) { System.out.print("C"); image = fixImage(image, bwdpi, colordpi, 24); } else if (profile.isSet(OutputProfile.Feature.GrayscaleImage) || profile.isSet(OutputProfile.Feature.GrayscaleContent)) { System.out.print("G"); image = fixImage(image, bwdpi, colordpi, 8); } else { System.out.print("B"); image = fixImage(image, bwdpi, bwdpi, 1); } return image; } }; static final Rasterizer ByRerenderTo1Bit = new Rasterizer() { public BufferedImage toBitmap(PagePainter painter, OutputProfile profile, int colordpi, int bwdpi) { BufferedImage image = painter.getImage(colordpi, PDFParser.RGB); if (profile.isSet(OutputProfile.Feature.ColorImage) || profile.isSet(OutputProfile.Feature.ColorContent)) { System.out.print("C"); } else if (profile.isSet(OutputProfile.Feature.GrayscaleImage) || profile.isSet(OutputProfile.Feature.GrayscaleContent)) { System.out.print("G"); image = fixImage(image, colordpi, colordpi, 8); } else { System.out.print("B"); image = painter.getImage(bwdpi, PDFParser.BLACKANDWHITE); } return image; } }; static final Rasterizer ByRerenderToColor = new Rasterizer() { public BufferedImage toBitmap(PagePainter painter, OutputProfile profile, int colordpi, int bwdpi) { BufferedImage image = painter.getImage(bwdpi, PDFParser.BLACKANDWHITE); if (profile.isSet(OutputProfile.Feature.ColorImage) || profile.isSet(OutputProfile.Feature.ColorContent)) { System.out.print("C"); image = painter.getImage(colordpi, PDFParser.RGB); } else if (profile.isSet(OutputProfile.Feature.GrayscaleImage) || profile.isSet(OutputProfile.Feature.GrayscaleContent)) { System.out.print("G"); image = painter.getImage(colordpi, PDFParser.GRAYSCALE); } else { System.out.print("B"); } return image; } }; static final Rasterizer NoChange = new Rasterizer() { public BufferedImage toBitmap(PagePainter painter, OutputProfile profile, int colordpi, int bwdpi) { return painter.getImage(colordpi, PDFParser.RGB); } }; }