/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.microsoft;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.AbstractPOIFSExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

public class WordExtractor
extends AbstractPOIFSExtractor {
    private static final char UNICODECHAR_NONBREAKING_HYPHEN = '\u2011';
    private static final char UNICODECHAR_ZERO_WIDTH_SPACE = '\u200b';
    private boolean curStrikeThrough;
    private boolean curBold;
    private boolean curItalic;
    private static final Map<String, TagAndStyle> fixedParagraphStyles = new HashMap<String, TagAndStyle>();
    private static final TagAndStyle defaultParagraphStyle = new TagAndStyle("p", null);

    public WordExtractor(ParseContext context) {
        super(context);
    }

    protected void parse(NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
        this.parse(filesystem.getRoot(), xhtml);
    }

    protected void parse(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
        HWPFDocument document;
        try {
            document = new HWPFDocument(root);
        }
        catch (OldWordFileFormatException e) {
            this.parseWord6(root, xhtml);
            return;
        }
        org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(document);
        this.addTextIfAny(xhtml, "header", wordExtractor.getHeaderText());
        PicturesTable pictureTable = document.getPicturesTable();
        PicturesSource pictures = new PicturesSource(document);
        Range r = document.getRange();
        for (int i = 0; i < r.numParagraphs(); ++i) {
            Paragraph p = r.getParagraph(i);
            i += this.handleParagraph(p, 0, r, document, pictures, pictureTable, xhtml);
        }
        for (String paragraph : wordExtractor.getMainTextboxText()) {
            xhtml.element("p", paragraph);
        }
        for (String paragraph : wordExtractor.getFootnoteText()) {
            xhtml.element("p", paragraph);
        }
        for (String paragraph : wordExtractor.getCommentsText()) {
            xhtml.element("p", paragraph);
        }
        for (String paragraph : wordExtractor.getEndnoteText()) {
            xhtml.element("p", paragraph);
        }
        this.addTextIfAny(xhtml, "footer", wordExtractor.getFooterText());
        Picture p = pictures.nextUnclaimed();
        while (p != null) {
            this.handlePictureCharacterRun(null, p, pictures, xhtml);
            p = pictures.nextUnclaimed();
        }
        try {
            DirectoryEntry op = (DirectoryEntry)root.getEntry("ObjectPool");
            for (Entry entry : op) {
                if (!entry.getName().startsWith("_") || !(entry instanceof DirectoryEntry)) continue;
                this.handleEmbeddedOfficeDoc((DirectoryEntry)entry, xhtml);
            }
        }
        catch (FileNotFoundException e) {
            // empty catch block
        }
    }

    private int handleParagraph(Paragraph p, int parentTableLevel, Range r, HWPFDocument document, PicturesSource pictures, PicturesTable pictureTable, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
        StyleDescription style;
        if (p.isInTable() && p.getTableLevel() > parentTableLevel && parentTableLevel == 0) {
            Table t = r.getTable(p);
            xhtml.startElement("table");
            xhtml.startElement("tbody");
            for (int rn = 0; rn < t.numRows(); ++rn) {
                TableRow row = t.getRow(rn);
                xhtml.startElement("tr");
                for (int cn = 0; cn < row.numCells(); ++cn) {
                    TableCell cell = row.getCell(cn);
                    xhtml.startElement("td");
                    for (int pn = 0; pn < cell.numParagraphs(); ++pn) {
                        Paragraph cellP = cell.getParagraph(pn);
                        this.handleParagraph(cellP, p.getTableLevel(), (Range)cell, document, pictures, pictureTable, xhtml);
                    }
                    xhtml.endElement("td");
                }
                xhtml.endElement("tr");
            }
            xhtml.endElement("tbody");
            xhtml.endElement("table");
            return t.numParagraphs() - 1;
        }
        TagAndStyle tas = document.getStyleSheet().numStyles() > p.getStyleIndex() ? ((style = document.getStyleSheet().getStyleDescription((int)p.getStyleIndex())) != null ? WordExtractor.buildParagraphTagAndStyle(style.getName(), parentTableLevel > 0) : new TagAndStyle("p", null)) : new TagAndStyle("p", null);
        if (tas.getStyleClass() != null) {
            xhtml.startElement(tas.getTag(), "class", tas.getStyleClass());
        } else {
            xhtml.startElement(tas.getTag());
        }
        for (int j = 0; j < p.numCharacterRuns(); ++j) {
            CharacterRun cr = p.getCharacterRun(j);
            if (cr.text().equals("\u0013")) {
                j += this.handleSpecialCharacterRuns(p, j, tas.isHeading(), pictures, xhtml);
                continue;
            }
            if (cr.text().startsWith("\b")) {
                for (int pn = 0; pn < cr.text().length(); ++pn) {
                    Picture picture = pictures.nextUnclaimed();
                    this.handlePictureCharacterRun(cr, picture, pictures, xhtml);
                }
                continue;
            }
            if (pictureTable.hasPicture(cr)) {
                Picture picture = pictures.getFor(cr);
                this.handlePictureCharacterRun(cr, picture, pictures, xhtml);
                continue;
            }
            this.handleCharacterRun(cr, tas.isHeading(), xhtml);
        }
        if (this.curStrikeThrough) {
            xhtml.endElement("s");
            this.curStrikeThrough = false;
        }
        if (this.curItalic) {
            xhtml.endElement("i");
            this.curItalic = false;
        }
        if (this.curBold) {
            xhtml.endElement("b");
            this.curBold = false;
        }
        xhtml.endElement(tas.getTag());
        return 0;
    }

    private void handleCharacterRun(CharacterRun cr, boolean skipStyling, XHTMLContentHandler xhtml) throws SAXException {
        if (!this.isRendered(cr) || cr.text().equals("\r")) {
            return;
        }
        if (!skipStyling) {
            if (cr.isBold() != this.curBold) {
                if (this.curStrikeThrough) {
                    xhtml.endElement("s");
                    this.curStrikeThrough = false;
                }
                if (this.curItalic) {
                    xhtml.endElement("i");
                    this.curItalic = false;
                }
                if (cr.isBold()) {
                    xhtml.startElement("b");
                } else {
                    xhtml.endElement("b");
                }
                this.curBold = cr.isBold();
            }
            if (cr.isItalic() != this.curItalic) {
                if (this.curStrikeThrough) {
                    xhtml.endElement("s");
                    this.curStrikeThrough = false;
                }
                if (cr.isItalic()) {
                    xhtml.startElement("i");
                } else {
                    xhtml.endElement("i");
                }
                this.curItalic = cr.isItalic();
            }
            if (cr.isStrikeThrough() != this.curStrikeThrough) {
                if (cr.isStrikeThrough()) {
                    xhtml.startElement("s");
                } else {
                    xhtml.endElement("s");
                }
                this.curStrikeThrough = cr.isStrikeThrough();
            }
        }
        String text = cr.text();
        if ((text = text.replace('\r', '\n')).endsWith("\u0007")) {
            text = text.substring(0, text.length() - 1);
        }
        text = text.replace('\u001e', '\u2011');
        text = text.replace('\u001f', '\u200b');
        xhtml.characters(text);
    }

    private int handleSpecialCharacterRuns(Paragraph p, int index, boolean skipStyling, PicturesSource pictures, XHTMLContentHandler xhtml) throws SAXException, TikaException, IOException {
        int i;
        ArrayList<CharacterRun> controls = new ArrayList<CharacterRun>();
        ArrayList<CharacterRun> texts = new ArrayList<CharacterRun>();
        boolean has14 = false;
        for (i = index + 1; i < p.numCharacterRuns(); ++i) {
            CharacterRun cr = p.getCharacterRun(i);
            if (cr.text().equals("\u0013")) {
                int increment = this.handleSpecialCharacterRuns(p, i + 1, skipStyling, pictures, xhtml);
                i += increment;
                continue;
            }
            if (cr.text().equals("\u0014")) {
                has14 = true;
                continue;
            }
            if (cr.text().equals("\u0015")) {
                if (has14) break;
                texts = controls;
                controls = new ArrayList();
                break;
            }
            if (has14) {
                texts.add(cr);
                continue;
            }
            controls.add(cr);
        }
        if (controls.size() > 0) {
            String text = ((CharacterRun)controls.get(0)).text();
            for (int j = 1; j < controls.size(); ++j) {
                text = text + ((CharacterRun)controls.get(j)).text();
            }
            if (text.startsWith("HYPERLINK") && text.indexOf(34) > -1) {
                String url = text.substring(text.indexOf(34) + 1, text.lastIndexOf(34));
                xhtml.startElement("a", "href", url);
                for (CharacterRun cr : texts) {
                    this.handleCharacterRun(cr, skipStyling, xhtml);
                }
                xhtml.endElement("a");
            } else {
                for (CharacterRun cr : texts) {
                    if (pictures.hasPicture(cr)) {
                        Picture picture = pictures.getFor(cr);
                        this.handlePictureCharacterRun(cr, picture, pictures, xhtml);
                        continue;
                    }
                    this.handleCharacterRun(cr, skipStyling, xhtml);
                }
            }
        } else {
            for (CharacterRun cr : texts) {
                this.handleCharacterRun(cr, skipStyling, xhtml);
            }
        }
        return i - index;
    }

    private void handlePictureCharacterRun(CharacterRun cr, Picture picture, PicturesSource pictures, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException {
        if (!this.isRendered(cr) || picture == null) {
            return;
        }
        String extension = picture.suggestFileExtension();
        int pictureNumber = pictures.pictureNumber(picture);
        String filename = "image" + pictureNumber + (extension.length() > 0 ? "." + extension : "");
        String mimeType = picture.getMimeType();
        AttributesImpl attr = new AttributesImpl();
        attr.addAttribute("", "src", "src", "CDATA", "embedded:" + filename);
        attr.addAttribute("", "alt", "alt", "CDATA", filename);
        xhtml.startElement("img", attr);
        xhtml.endElement("img");
        if (!pictures.hasOutput(picture)) {
            TikaInputStream stream = TikaInputStream.get((byte[])picture.getContent());
            this.handleEmbeddedResource(stream, filename, mimeType, xhtml, false);
            pictures.recordOutput(picture);
        }
    }

    private void addTextIfAny(XHTMLContentHandler xhtml, String section, String text) throws SAXException {
        if (text != null && text.length() > 0) {
            xhtml.startElement("div", "class", section);
            xhtml.element("p", text);
            xhtml.endElement("div");
        }
    }

    protected void parseWord6(NPOIFSFileSystem filesystem, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
        this.parseWord6(filesystem.getRoot(), xhtml);
    }

    protected void parseWord6(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
        HWPFOldDocument doc = new HWPFOldDocument(root);
        Word6Extractor extractor = new Word6Extractor(doc);
        for (String p : extractor.getParagraphText()) {
            xhtml.element("p", p);
        }
    }

    public static TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable) {
        TagAndStyle tagAndStyle = fixedParagraphStyles.get(styleName);
        if (tagAndStyle != null) {
            return tagAndStyle;
        }
        if (styleName.equals("Table Contents") && isTable) {
            return defaultParagraphStyle;
        }
        String tag = "p";
        String styleClass = null;
        if (styleName.startsWith("heading") || styleName.startsWith("Heading")) {
            int num = 1;
            try {
                num = Integer.parseInt(styleName.substring(styleName.length() - 1));
            }
            catch (NumberFormatException e) {
                // empty catch block
            }
            tag = "h" + Math.min(num, 6);
        } else {
            styleClass = styleName.replace(' ', '_');
            styleClass = styleClass.substring(0, 1).toLowerCase() + styleClass.substring(1);
        }
        return new TagAndStyle(tag, styleClass);
    }

    private boolean isRendered(CharacterRun cr) {
        return cr == null || !cr.isMarkedDeleted();
    }

    static {
        fixedParagraphStyles.put("Default", defaultParagraphStyle);
        fixedParagraphStyles.put("Normal", defaultParagraphStyle);
        fixedParagraphStyles.put("heading", new TagAndStyle("h1", null));
        fixedParagraphStyles.put("Heading", new TagAndStyle("h1", null));
        fixedParagraphStyles.put("Title", new TagAndStyle("h1", "title"));
        fixedParagraphStyles.put("Subtitle", new TagAndStyle("h2", "subtitle"));
        fixedParagraphStyles.put("HTML Preformatted", new TagAndStyle("pre", null));
    }

    private static class PicturesSource {
        private PicturesTable picturesTable;
        private Set<Picture> output = new HashSet<Picture>();
        private Map<Integer, Picture> lookup;
        private List<Picture> nonU1based;
        private List<Picture> all;
        private int pn = 0;

        private PicturesSource(HWPFDocument doc) {
            this.picturesTable = doc.getPicturesTable();
            this.all = this.picturesTable.getAllPictures();
            this.lookup = new HashMap<Integer, Picture>();
            for (Picture p : this.all) {
                this.lookup.put(p.getStartOffset(), p);
            }
            this.nonU1based = new ArrayList<Picture>();
            this.nonU1based.addAll(this.all);
            Range r = doc.getRange();
            for (int i = 0; i < r.numCharacterRuns(); ++i) {
                CharacterRun cr = r.getCharacterRun(i);
                if (!this.picturesTable.hasPicture(cr)) continue;
                Picture p = this.getFor(cr);
                int at = this.nonU1based.indexOf(p);
                this.nonU1based.set(at, null);
            }
        }

        private boolean hasPicture(CharacterRun cr) {
            return this.picturesTable.hasPicture(cr);
        }

        private void recordOutput(Picture picture) {
            this.output.add(picture);
        }

        private boolean hasOutput(Picture picture) {
            return this.output.contains(picture);
        }

        private int pictureNumber(Picture picture) {
            return this.all.indexOf(picture) + 1;
        }

        private Picture getFor(CharacterRun cr) {
            return this.lookup.get(cr.getPicOffset());
        }

        private Picture nextUnclaimed() {
            Picture p = null;
            while (this.pn < this.nonU1based.size()) {
                p = this.nonU1based.get(this.pn);
                ++this.pn;
                if (p == null) continue;
                return p;
            }
            return null;
        }
    }

    public static class TagAndStyle {
        private String tag;
        private String styleClass;

        public TagAndStyle(String tag, String styleClass) {
            this.tag = tag;
            this.styleClass = styleClass;
        }

        public String getTag() {
            return this.tag;
        }

        public String getStyleClass() {
            return this.styleClass;
        }

        public boolean isHeading() {
            return this.tag.length() == 2 && this.tag.startsWith("h");
        }
    }
}

