/* 
 * Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
 *
 * This file is part of Rast.
 * See the file COPYING for redistribution information.
 *
 */

#include <apr_strings.h>

#include "rast/filter.h"

#define SUPPORTED_FRAMEWORK_VERSION 1

static const char *
mime_type_to_encoding(const char *mime_type)
{
    const char *p;

    p = strchr(mime_type, '=');
    if (p == NULL) {
        return NULL;
    }
    
    return p + 1;
}

static rast_error_t *
extract_html(rast_filter_t *filter, apr_bucket_brigade **next_brigade,
             const char *buf, int buf_nbytes, int *have_title,
             const char *from_encoding, apr_bucket_alloc_t *bucket_alloc,
             apr_pool_t *pool)
{
    const char *p, *p_end, *q;
    int nbytes;
    apr_bucket *next_bucket;

    *have_title = 0;
    q = p = buf;
    p_end = buf + buf_nbytes;

    while (p < p_end) {
        while (*p != '<') {
            p++;
            if (p >= p_end) {
                goto end;
            }
        }

        nbytes = p - q;
        if (nbytes > 0) {
            if (*have_title) {
                char *tmp;
                const char *db_encoding;
                size_t tmp_nbytes;
                rast_value_t title;
                rast_error_t *error;

                db_encoding = rast_filter_db_encoding(filter);
                error = rast_convert_encoding(from_encoding, db_encoding, q,
                                              nbytes, &tmp, &tmp_nbytes,
                                              filter->pool);
                if (error != RAST_OK) {
                    return error;
                }
                tmp[tmp_nbytes] = '\0';

                rast_value_set_type(&title, RAST_TYPE_STRING);
                rast_value_set_string(&title, tmp);
                rast_filter_set_property(filter, "title", &title);
                *have_title = 0;
            }
            else {
                char *text;

                text = apr_pmemdup(filter->pool, q, nbytes);
                next_bucket = apr_bucket_transient_create(text, nbytes,
                                                          bucket_alloc);
                if (*next_brigade == NULL) {
                    *next_brigade = apr_brigade_create(pool, bucket_alloc);
                }
                APR_BRIGADE_INSERT_TAIL(*next_brigade, next_bucket);
            }
        }

        while (*p != '>') {
            p++;
            if (p >= p_end) {
                q = p;
                goto end;
            }
        }
        if (strncmp(q + 1, "title", strlen("title")) == 0) {
            *have_title = 1;
        }

        p++;
        q = p;
    }
 end:
    nbytes = p - q;
    if (nbytes > 0) {
        char *text;

        text = apr_pmemdup(filter->pool, q, nbytes);
        next_bucket = apr_bucket_transient_create(text, nbytes,
                                                  bucket_alloc);
        if (*next_brigade == NULL) {
            *next_brigade = apr_brigade_create(pool, bucket_alloc);
        }
        APR_BRIGADE_INSERT_TAIL(*next_brigade, next_bucket);
    }

    return RAST_OK;
}

static rast_error_t *
html_filter_invoke(rast_filter_t *filter, apr_bucket_brigade *brigade,
                   const char *mime_type)
{
    apr_bucket_alloc_t *bucket_alloc;
    apr_bucket *bucket;
    apr_bucket_brigade *next_brigade = NULL;
    apr_pool_t *pool;
    apr_status_t status;
    rast_error_t *error = RAST_OK;
    const char *candidate_encodings[] = {
        "UTF-8",
        "EUC-JP",
        NULL
    };
    const char *from_encoding;
    rast_encoding_converter_t *converter;

    apr_pool_create(&pool, filter->pool);
    bucket_alloc = apr_bucket_alloc_create(pool);

    from_encoding = mime_type_to_encoding(mime_type);
    if (from_encoding == NULL) {
        from_encoding = "UTF-8";
    }
    error = rast_encoding_converter_create(&converter, from_encoding,
                                           candidate_encodings,
                                           pool);
    if (error != RAST_OK) {
        apr_pool_destroy(pool);
        return error;
    }

    for (bucket = APR_BRIGADE_FIRST(brigade);
         bucket != APR_BRIGADE_SENTINEL(brigade);
         bucket = APR_BUCKET_NEXT(bucket)) {
        const char *buf;
        char out_buf[1024];
        apr_size_t buf_nbytes;
        int have_title, out_buf_nbytes = sizeof(out_buf);

        if (APR_BUCKET_IS_EOS(bucket)) {
            apr_bucket *next_bucket;

            next_bucket = apr_bucket_eos_create(bucket_alloc);
            if (next_brigade == NULL) {
                next_brigade = apr_brigade_create(pool, bucket_alloc);
            }
            APR_BRIGADE_INSERT_TAIL(next_brigade, next_bucket);
            error = rast_mime_filter_pass(filter, next_brigade,
                                          "text/plain", NULL);
            apr_brigade_destroy(next_brigade);
            next_brigade = NULL;
            if (error != RAST_OK) {
                apr_pool_destroy(pool);
                return error;
            }
            continue;
        }

        status = apr_bucket_read(bucket, &buf, &buf_nbytes, APR_BLOCK_READ);
        if (status != APR_SUCCESS) {
            return apr_status_to_rast_error(status);
        }

        error = rast_encoding_converter_add_text(converter, buf, buf_nbytes);
        if (error != RAST_OK) {
            apr_pool_destroy(pool);
            return error;
        }

        do {
            error = rast_encoding_converter_get_next(converter, out_buf,
                                                     &out_buf_nbytes);
            if (error != RAST_OK) {
                apr_pool_destroy(pool);
                return error;
            }

            error = extract_html(filter, &next_brigade, out_buf,
                                 out_buf_nbytes,  &have_title, candidate_encodings[0],
                                 bucket_alloc, pool);
            if (error != RAST_OK) {
                apr_pool_destroy(pool);
                return error;
            }
        } while (!rast_encoding_converter_is_done(converter));
    }

    if (next_brigade != NULL && !APR_BRIGADE_EMPTY(next_brigade)) {
        error = rast_mime_filter_pass(filter, next_brigade, "text/plain",
                                      NULL);
    }

    apr_pool_destroy(pool);
    return error;
}

rast_error_t *
rast_html_filter_module_initialize(rast_filter_map_t *map)
{
    const char *mime_type = "text/x-rast-test-html";
    static rast_filter_module_t filter_module = {
        SUPPORTED_FRAMEWORK_VERSION,
        NULL,
        html_filter_invoke,
    };
    rast_error_t *error;

    error = rast_filter_map_add_mime_filter(map, mime_type, &filter_module);
    if (error != RAST_OK) {
        /* todo: error handling */
        rast_error_destroy(error);
    }

    return RAST_OK;
}
