/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.sax;

import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.util.UnicodeTokenizer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class BoilerpipeHTMLContentHandler
implements ContentHandler {
    private static final String ANCHOR_TEXT_START = "$\ue00a<";
    private static final String ANCHOR_TEXT_END = ">\ue00a$";
    private StringBuilder tokenBuffer = new StringBuilder();
    private StringBuilder textBuffer = new StringBuilder();
    private int inBody = 0;
    private int inAnchor = 0;
    private int inIgnorableElement = 0;
    private boolean sbLastWasWhitespace = false;
    private int textElementIdx = 0;
    private final List<TextBlock> textBlocks = new ArrayList<TextBlock>();
    private String lastStartTag = null;
    private String lastEndTag = null;
    private Event lastEvent = null;
    private boolean flush = false;
    private int offsetBlocks = 0;
    private BitSet currentContainedTextElements = new BitSet();
    boolean inAnchorText = false;
    private static final Pattern PAT_VALID_WORD_CHARACTER = Pattern.compile("[\\p{L}\\p{Nd}\\p{Nl}\\p{No}]");
    private static final TagAction TA_IGNORABLE_ELEMENT = new TagAction(){

        public boolean start(BoilerpipeHTMLContentHandler instance, String localName) {
            instance.inIgnorableElement++;
            return true;
        }

        public boolean end(BoilerpipeHTMLContentHandler instance, String localName) {
            instance.inIgnorableElement--;
            return true;
        }
    };
    private static final TagAction TA_ANCHOR_TEXT = new TagAction(){

        public boolean start(BoilerpipeHTMLContentHandler instance, String localName) throws SAXException {
            if (instance.inAnchor++ == 0) {
                if (instance.inIgnorableElement == 0) {
                    if (!instance.sbLastWasWhitespace) {
                        instance.tokenBuffer.append(' ');
                        instance.textBuffer.append(' ');
                    }
                    instance.tokenBuffer.append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_START);
                    instance.tokenBuffer.append(' ');
                    instance.sbLastWasWhitespace = true;
                }
                return false;
            }
            throw new SAXException("SAX input contains nested A elements -- You have probably hit a bug in NekoHTML (#2909310). Please clean the HTML externally and feed it to boilerpipe again");
        }

        public boolean end(BoilerpipeHTMLContentHandler instance, String localName) {
            if (--instance.inAnchor == 0 && instance.inIgnorableElement == 0) {
                if (!instance.sbLastWasWhitespace) {
                    instance.tokenBuffer.append(' ');
                    instance.textBuffer.append(' ');
                }
                instance.tokenBuffer.append(BoilerpipeHTMLContentHandler.ANCHOR_TEXT_END);
                instance.tokenBuffer.append(' ');
                instance.sbLastWasWhitespace = true;
            }
            return false;
        }
    };
    private static final TagAction TA_BODY = new TagAction(){

        public boolean start(BoilerpipeHTMLContentHandler instance, String localName) {
            instance.inBody++;
            return false;
        }

        public boolean end(BoilerpipeHTMLContentHandler instance, String localName) {
            instance.flushBlock();
            instance.inBody--;
            return false;
        }
    };
    private static final TagAction TA_INLINE = new TagAction(){

        public boolean start(BoilerpipeHTMLContentHandler instance, String localName) {
            if (!instance.sbLastWasWhitespace) {
                instance.tokenBuffer.append(' ');
                instance.textBuffer.append(' ');
                instance.sbLastWasWhitespace = true;
            }
            return false;
        }

        public boolean end(BoilerpipeHTMLContentHandler instance, String localName) {
            if (!instance.sbLastWasWhitespace) {
                instance.tokenBuffer.append(' ');
                instance.textBuffer.append(' ');
            }
            return false;
        }
    };
    private static Map<String, TagAction> TAG_ACTIONS = new HashMap<String, TagAction>();
    private String title = null;

    @Override
    public void endDocument() throws SAXException {
        this.flushBlock();
    }

    @Override
    public void endPrefixMapping(String prefix) throws SAXException {
    }

    @Override
    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
        if (!this.sbLastWasWhitespace) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        this.sbLastWasWhitespace = true;
    }

    @Override
    public void processingInstruction(String target, String data) throws SAXException {
    }

    @Override
    public void setDocumentLocator(Locator locator) {
    }

    @Override
    public void skippedEntity(String name) throws SAXException {
    }

    @Override
    public void startDocument() throws SAXException {
    }

    @Override
    public void startPrefixMapping(String prefix, String uri) throws SAXException {
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        TagAction ta = TAG_ACTIONS.get(localName);
        this.flush = ta != null ? ta.start(this, localName) | this.flush : true;
        this.lastEvent = Event.START_TAG;
        this.lastStartTag = localName;
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        TagAction ta = TAG_ACTIONS.get(localName);
        this.flush = ta != null ? ta.end(this, localName) | this.flush : true;
        this.lastEvent = Event.END_TAG;
        this.lastEndTag = localName;
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        char c;
        ++this.textElementIdx;
        if (this.flush) {
            this.flushBlock();
            this.flush = false;
        }
        if (this.inIgnorableElement != 0) {
            return;
        }
        boolean startWhitespace = false;
        boolean endWhitespace = false;
        if (length == 0) {
            return;
        }
        int end = start + length;
        for (int i = start; i < end; ++i) {
            if (!Character.isWhitespace(ch[i])) continue;
            ch[i] = 32;
        }
        while (start < end && (c = ch[start]) == ' ') {
            startWhitespace = true;
            ++start;
            --length;
        }
        while (length > 0 && (c = ch[start + length - 1]) == ' ') {
            endWhitespace = true;
            --length;
        }
        if (length == 0) {
            if (startWhitespace || endWhitespace) {
                if (!this.sbLastWasWhitespace) {
                    this.textBuffer.append(' ');
                    this.tokenBuffer.append(' ');
                }
                this.sbLastWasWhitespace = true;
            } else {
                this.sbLastWasWhitespace = false;
            }
            this.lastEvent = Event.WHITESPACE;
            return;
        }
        if (startWhitespace && !this.sbLastWasWhitespace) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        this.textBuffer.append(ch, start, length);
        this.tokenBuffer.append(ch, start, length);
        if (endWhitespace) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        this.sbLastWasWhitespace = endWhitespace;
        this.lastEvent = Event.CHARACTERS;
        this.currentContainedTextElements.set(this.textElementIdx);
    }

    List<TextBlock> getTextBlocks() {
        return this.textBlocks;
    }

    private void flushBlock() {
        int numWordsInWrappedLines;
        if (this.inBody == 0) {
            if ("TITLE".equals(this.lastStartTag) && this.inBody == 0) {
                this.setTitle(this.tokenBuffer.toString().trim());
            }
            this.tokenBuffer.setLength(0);
            return;
        }
        int length = this.tokenBuffer.length();
        switch (length) {
            case 0: {
                return;
            }
            case 1: {
                if (!this.sbLastWasWhitespace) break;
                this.tokenBuffer.setLength(0);
                return;
            }
        }
        String[] tokens = UnicodeTokenizer.tokenize(this.tokenBuffer);
        int numWords = 0;
        int numLinkedWords = 0;
        int numWrappedLines = 0;
        int currentLineLength = -1;
        int maxLineLength = 80;
        int numTokens = 0;
        int numWordsCurrentLine = 0;
        for (String token : tokens) {
            if (ANCHOR_TEXT_START.equals(token)) {
                this.inAnchorText = true;
                continue;
            }
            if (ANCHOR_TEXT_END.equals(token)) {
                this.inAnchorText = false;
                continue;
            }
            if (BoilerpipeHTMLContentHandler.isWord(token)) {
                int tokenLength;
                ++numTokens;
                ++numWords;
                ++numWordsCurrentLine;
                if (this.inAnchorText) {
                    ++numLinkedWords;
                }
                if ((currentLineLength += (tokenLength = token.length()) + 1) <= 80) continue;
                ++numWrappedLines;
                currentLineLength = tokenLength;
                numWordsCurrentLine = 1;
                continue;
            }
            ++numTokens;
        }
        if (numTokens == 0) {
            return;
        }
        if (numWrappedLines == 0) {
            numWordsInWrappedLines = numWords;
            numWrappedLines = 1;
        } else {
            numWordsInWrappedLines = numWords - numWordsCurrentLine;
        }
        TextBlock tb = new TextBlock(this.textBuffer.toString().trim(), this.currentContainedTextElements, numWords, numLinkedWords, numWordsInWrappedLines, numWrappedLines, this.offsetBlocks);
        this.currentContainedTextElements = new BitSet();
        ++this.offsetBlocks;
        this.textBuffer.setLength(0);
        this.tokenBuffer.setLength(0);
        this.textBlocks.add(tb);
    }

    private static boolean isWord(String token) {
        return PAT_VALID_WORD_CHARACTER.matcher(token).find();
    }

    private static void addTagAction(Map<String, TagAction> tagActions, String tag, TagAction action) {
        tagActions.put(tag.toUpperCase(), action);
        tagActions.put(tag.toLowerCase(), action);
    }

    public String getTitle() {
        return this.title;
    }

    public void setTitle(String s) {
        if (s == null || s.length() == 0) {
            return;
        }
        this.title = s;
    }

    public TextDocument toTextDocument() {
        this.flushBlock();
        return new TextDocument(this.getTitle(), this.getTextBlocks());
    }

    static {
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "STYLE", TA_IGNORABLE_ELEMENT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "SCRIPT", TA_IGNORABLE_ELEMENT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "OPTION", TA_IGNORABLE_ELEMENT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "OBJECT", TA_IGNORABLE_ELEMENT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "EMBED", TA_IGNORABLE_ELEMENT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "APPLET", TA_IGNORABLE_ELEMENT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "A", TA_ANCHOR_TEXT);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "BODY", TA_BODY);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "STRIKE", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "U", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "B", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "I", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "EM", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "STRONG", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "SPAN", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "ABBR", TA_INLINE);
        BoilerpipeHTMLContentHandler.addTagAction(TAG_ACTIONS, "ACRONYM", TA_INLINE);
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static enum Event {
        START_TAG,
        END_TAG,
        CHARACTERS,
        WHITESPACE;

    }

    private static interface TagAction {
        public boolean start(BoilerpipeHTMLContentHandler var1, String var2) throws SAXException;

        public boolean end(BoilerpipeHTMLContentHandler var1, String var2) throws SAXException;
    }
}

