JavaRanch Home    
 
This page:         last edited 11 January 2010         What's Changed?         Edit

Read Power Point   

This code example shows how to use the Apache POI library to read a PowerPoint? presentation file, and how to extract text, images and notes from it.

This code works with binary PPT files (.ppt), not the XML format (.pptx). POI's APIs for both are pretty similar, though. One would use JavaDoc:org.apache.poi.xslf.XSLFSlideShow instead of HSLSFSlideShow?, and then use the classes in org.apache.poi.xslf.* instead of the ones in org.apache.poi.hssf.*


import java.io.*;

import org.apache.poi.hslf.*;
import org.apache.poi.hslf.model.*;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.TextHeaderAtom;
import org.apache.poi.hslf.usermodel.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

public class PPTTest {

    public static void main (String[] args) throws Exception {
        if (args.length == 0) {
            System.out.println("usage is: java PPTTest <PPT file>");
            System.exit(0);
        }

        InputStream fis = new FileInputStream(args[0]);;
        POIFSFileSystem fs = new POIFSFileSystem(fis);
        HSLFSlideShow show = new HSLFSlideShow(fs);

        // slide
        SlideShow ss = new SlideShow(show);
        Slide[] slides = ss.getSlides();
        for (int i=0; i<slides.length; i++) {
            System.out.println("slide "+(i+1)+": "+slides[i].getTitle());

            // text runs
            TextRun[] runs = slides[i].getTextRuns();
            for (int j=0; j<runs.length; j++) {
                TextRun run = runs[j];
                if (run.getRunType() == TextHeaderAtom.TITLE_TYPE) {
                    System.out.println("slide title "+(j+1)+": "+run.getText());
                } else {
                    System.out.println("slide text run "+(j+1)+": "+run.getRunType()+" : "+run.getText());
                }
            }

            // shapes
            Shape[] shapes = slides[i].getShapes();
            for (int j=0; j<shapes.length; j++) {
                System.out.println("shape "+(j+1)+": "+shapes[j].getClass());
                if (shapes[j] instanceof Picture) {
                    PictureData pd = ((Picture) shapes[j]).getPictureData();
                    System.out.print("picture "+(i+1)+": ");
                    String ext = "";
                    switch (pd.getType()) {
                        case Picture.DIB:
                            System.out.println("DIB"); ext = ".dib"break;
                        case Picture.EMF:
                            System.out.println("EMF"); ext = ".emf"break;
                        case Picture.JPEG:
                            System.out.println("JPEG"); ext = ".jpg"break;
                        case Picture.PICT:
                            System.out.println("PICT"); ext = ".pict"break;
                        case Picture.PNG:
                            System.out.println("PNG"); ext = ".png"break;
                        case Picture.WMF:
                            System.out.println("WMF"); ext = ".wmf"break;
                        default:
                            System.out.println("????"); break;
                    }
                    /*
                    FileOutputStream fos = new FileOutputStream("picture"+(i+1)+ext);
                    fos.write(pd.getData());
                    fos.close();
                    */
                }
            }

            // notes
            Notes notes = slides[i].getNotesSheet();
            if (notes != null) {
                runs = notes.getTextRuns();
                for (int j=0; j<runs.length; j++) {
                    System.out.println("notes text run "+(j+1)+": "+runs[j].getText());
                }
            }
        }
    }
}


CategoryCodeSamples CodeBarn

JavaRanchContact us — Copyright © 1998-2014 Paul Wheaton