/*
 * Copyright 2004-2005 The Trix Development Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.trix.cuery;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.trix.cuery.filter.Filter;
import org.trix.cuery.parser.CueryParser;
import org.trix.cuery.util.CSSUtil;
import org.trix.cuery.util.DOMUtil;

import org.w3c.css.sac.DescendantSelector;
import org.w3c.css.sac.Selector;
import org.w3c.css.sac.SiblingSelector;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import org.xml.sax.SAXException;

/**
 * <p>
 * In CSS, pattern matching rules determine which style rules apply to elements in the document
 * tree. These patterns, called selectors, may range from simple element names to rich contextual
 * patterns. If all conditions in the pattern are true for a certain element, the selector matches
 * the element.
 * </p>
 * <p>
 * The following table summarizes Selector syntax:
 * </p>
 * <table class="selectorsreview" border="1" width="100%"> <tbody>
 * <tr>
 * <th class="pattern">Pattern</th>
 * <th class="meaning">Meaning</th>
 * <th class="origin">First defined in CSS level</th>
 * </tr>
 * <tr>
 * <td class="pattern">*</td>
 * <td class="meaning">any element</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E</td>
 * <td class="meaning">an element of type E</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[foo]</td>
 * <td class="meaning">an E element with a "foo" attribute</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[foo="bar"]</td>
 * <td class="meaning">an E element whose "foo" attribute value is exactly equal to "bar"</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[foo~="bar"]</td>
 * <td class="meaning">an E element whose "foo" attribute value is a list of space-separated
 * values, one of which is exactly equal to "bar"</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[foo^="bar"]</td>
 * <td class="meaning">an E element whose "foo" attribute value begins exactly with the string
 * "bar"</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[foo$="bar"]</td>
 * <td class="meaning">an E element whose "foo" attribute value ends exactly with the string "bar"</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[foo*="bar"]</td>
 * <td class="meaning">an E element whose "foo" attribute value contains the substring "bar"</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E[hreflang|="en"]</td>
 * <td class="meaning">an E element whose "hreflang" attribute has a hyphen-separated list of
 * values beginning (from the left) with "en"</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:root</td>
 * <td class="meaning">an E element, root of the document</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:nth-child(n)</td>
 * <td class="meaning">an E element, the n-th child of its parent</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:nth-last-child(n)</td>
 * <td class="meaning">an E element, the n-th child of its parent, counting from the last one</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:nth-of-type(n)</td>
 * <td class="meaning">an E element, the n-th sibling of its type</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:nth-last-of-type(n)</td>
 * <td class="meaning">an E element, the n-th sibling of its type, counting from the last one</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:first-child</td>
 * <td class="meaning">an E element, first child of its parent</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:last-child</td>
 * <td class="meaning">an E element, last child of its parent</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:first-of-type</td>
 * <td class="meaning">an E element, first sibling of its type</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:last-of-type</td>
 * <td class="meaning">an E element, last sibling of its type</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:only-child</td>
 * <td class="meaning">an E element, only child of its parent</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:only-of-type</td>
 * <td class="meaning">an E element, only sibling of its type</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:empty</td>
 * <td class="meaning">an E element that has no children (including text nodes)</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:link <br>
 * E:visited</td>
 * <td class="meaning">an E element being the source anchor of a hyperlink of which the target is
 * not yet visited (:link) or already visited (:visited)</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:active <br>
 * E:hover <br>
 * E:focus</td>
 * <td class="meaning">an E element during certain user actions</td>
 * <td class="origin">1 and 2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:target</td>
 * <td class="meaning">an E element being the target of the referring URI</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:lang(fr)</td>
 * <td class="meaning">an element of type E in language "fr" (the document language specifies how
 * language is determined)</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:enabled<br>
 * E:disabled&nbsp;</td>
 * <td class="meaning">a user interface element E which is enabled or disabled</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:checked<br>
 * E:indeterminate&nbsp;</td>
 * <td class="meaning">a user interface element E which is checked or in an indeterminate state
 * (for instance a radio-button or checkbox)</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:contains("foo")</td>
 * <td class="meaning">an E element containing the substring "foo" in its textual contents</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E::first-line</td>
 * <td class="meaning">the first formatted line of an E element</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E::first-letter</td>
 * <td class="meaning">the first formatted letter of an E element</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E::selection</td>
 * <td class="meaning">the portion of an E element that is currently selected/highlighted by the
 * user</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E::before</td>
 * <td class="meaning">generated content before an E element</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E::after</td>
 * <td class="meaning">generated content after an E element</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E.warning</td>
 * <td class="meaning">an E element whose class is "warning" (the document language specifies how
 * class is determined).</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E#myid</td>
 * <td class="meaning">an E element with ID equal to "myid".</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E:not(s)</td>
 * <td class="meaning">an E element that does not match simple selector s</td>
 * <td class="origin">3</td>
 * </tr>
 * <tr>
 * <td class="pattern">E F</td>
 * <td class="meaning">an F element descendant of an E element</td>
 * <td class="origin">1</td>
 * </tr>
 * <tr>
 * <td class="pattern">E &gt; F</td>
 * <td class="meaning">an F element child of an E element</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E + F</td>
 * <td class="meaning">an F element immediately preceded by an E element</td>
 * <td class="origin">2</td>
 * </tr>
 * <tr>
 * <td class="pattern">E ~ F</td>
 * <td class="meaning">an F element preceded by an E element</td>
 * <td class="origin">3</td>
 * </tr>
 * </tbody></table>
 * <p>
 * The meaning of each selector is derived from the table above by prepending "matches" to the
 * contents of each cell of the "Meaning" column.
 * </p>
 * 
 * @author <a href="mailto:Teletha.T@gmail.com">Teletha Testarossa</a>
 * @version $ Id: CSSQuery.java,v 1.09 2005/11/17 06:38:13 Teletha Exp $
 */
public class CSSQuery {

    /** The css parser. */
    private static final CueryParser PARSER = new CueryParser();

    /** The root document. */
    private final Document document;

    /**
     * Create CSSQuery instance.
     * 
     * @param path A path to the target xml document to parse.
     * @throws IOException If this file has a I/O error.
     */
    public CSSQuery(String path) throws IOException {
        this(new File(path));
    }

    /**
     * Create CSSQuery instance.
     * 
     * @param file A target xml document to parse.
     * @throws IOException If this file has a I/O error.
     */
    public CSSQuery(File file) throws IOException {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);

        try {
            DocumentBuilder builder = factory.newDocumentBuilder();

            this.document = builder.parse(file);
        } catch (ParserConfigurationException e) {
            throw new IOException(e.getMessage());
        } catch (SAXException e) {
            throw new IOException(e.getMessage());
        }
    }

    /**
     * Create CSSQuery instance.
     * 
     * @param document A root document to parse.
     */
    public CSSQuery(Document document) {
        // assert null
        if (document == null) {
            throw new IllegalArgumentException("The target document is null.");
        }
        this.document = document;
    }

    public boolean match(Element element, String expression) {
        return match(element, PARSER.parseSelector(expression));
    }

    public boolean match(Element element, Selector selector) {
        // assert null
        if (selector == null || element == null) {
            return false;
        }

        Filter filter = CSSUtil.convert(selector);
        return filter.accept(element);
    }

    /**
     * Query css selector and retrieve elements from the document.
     * 
     * @param expression A css selector expression.
     * @return All mathched elements.
     */
    public Set select(String expression) {
        return select(PARSER.parseSelector(expression));
    }

    /**
     * Query css selector and retrieve elements from the document.
     * 
     * @param selector A css selector.
     * @return All mathched elements.
     */
    public Set select(Selector selector) {
        return query(document, selector);
    }

    /**
     * Query css selector and retrieve elements.
     * 
     * @param source A source to start parsing.
     * @param selector A css selector.
     * @return All mathched elements.
     */
    private Set query(Node source, Selector selector) {
        switch (selector.getSelectorType()) {
        case Filter.SAC_CHILD_SELECTOR:
            return queryChild(source, (DescendantSelector) selector);

        case Filter.SAC_DESCENDANT_SELECTOR:
            return queryDescendant(source, (DescendantSelector) selector);

        case Filter.SAC_DIRECT_ADJACENT_SELECTOR:
            return queryDirect(source, (SiblingSelector) selector);

        case Filter.SAC_INDIRECT_ADJACENT_SELECTOR:
            return queryIndirect(source, (SiblingSelector) selector);

        default:
            return DOMUtil.retrieveElements(source, CSSUtil.convert(selector), true);
        }
    }

    /**
     * Query child selector and retrieve elements.
     * 
     * @param source A source to start parsing.
     * @param selector A css selector.
     * @return All mathched elements.
     */
    private Set queryChild(Node source, DescendantSelector selector) {
        Set results = query(source, selector.getAncestorSelector());

        // check size
        if (results.size() == 0) {
            return Collections.EMPTY_SET;
        }

        Set container = new HashSet();
        Iterator iterator = results.iterator();
        Filter filter = CSSUtil.convert(selector.getSimpleSelector());

        // retrieve
        while (iterator.hasNext()) {
            Element result = (Element) iterator.next();
            container.addAll(DOMUtil.retrieveElements(result, filter, false));
        }
        return container;
    }

    /**
     * Query descendant selector and retrieve elements.
     * 
     * @param source A source to start parsing.
     * @param selector A css selector.
     * @return All mathched elements.
     */
    private Set queryDescendant(Node source, DescendantSelector selector) {
        Set results = query(source, selector.getAncestorSelector());

        // check size
        if (results.size() == 0) {
            return Collections.EMPTY_SET;
        }

        Set container = new HashSet();
        Iterator iterator = results.iterator();
        Filter filter = CSSUtil.convert(selector.getSimpleSelector());

        // retrieve
        while (iterator.hasNext()) {
            Element result = (Element) iterator.next();
            container.addAll(DOMUtil.retrieveElements(result, filter, true));
        }
        return container;
    }

    /**
     * Query direct adjacent selector and retrieve elements.
     * 
     * @param source A source to start parsing.
     * @param selector A css selector.
     * @return All mathched elements.
     */
    private Set queryDirect(Node source, SiblingSelector selector) {
        Set results = query(source, selector.getSelector());

        // check size
        if (results.size() == 0) {
            return Collections.EMPTY_SET;
        }

        Set container = new HashSet();
        Iterator iterator = results.iterator();
        Filter filter = CSSUtil.convert(selector.getSiblingSelector());

        // retrieve
        while (iterator.hasNext()) {
            Element next = DOMUtil.getNextElement((Element) iterator.next());

            if (next != null && filter.accept(next)) {
                container.add(next);
            }
        }
        return container;
    }

    /**
     * Query indirect adjacent selector and retrieve elements.
     * 
     * @param source A source to start parsing.
     * @param selector A css selector.
     * @return All mathched elements.
     */
    private Set queryIndirect(Node source, SiblingSelector selector) {
        Set results = query(source, selector.getSelector());

        // check size
        if (results.size() == 0) {
            return Collections.EMPTY_SET;
        }

        Set container = new HashSet();
        Iterator iterator = results.iterator();
        Filter filter = CSSUtil.convert(selector.getSiblingSelector());

        // retrieve
        while (iterator.hasNext()) {
            Element next = DOMUtil.getNextElement((Element) iterator.next());

            while (next != null) {
                if (filter.accept(next)) {
                    container.add(next);
                }
                next = DOMUtil.getNextElement(next);
            }
        }
        return container;
    }
}
