/* 
 * Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
 *
 * This file is part of Rast.
 * See the file COPYING for redistribution information.
 *
 */

#include <stdlib.h>
#include <ctype.h>

#include <apr_errno.h>
#include <apr_strings.h>
#include <apr_hash.h>
#include <apr_file_info.h>
#include <apr_dso.h>

#include "rast/config.h"
#include "rast/encoding.h"

static apr_pool_t *encoding_module_pool;
static apr_hash_t *encoding_modules;

typedef struct {
    apr_dso_handle_t *dso_handle;
    rast_encoding_module_t *module;
} encoding_module_info_t;

rast_error_t *
rast_load_encoding_modules(const char *dirname)
{
    apr_status_t status;
    apr_dir_t *dir;
    apr_finfo_t finfo;
    int name_len, shrext_len = strlen(SHREXT);

    if (encoding_modules != NULL) {
        return rast_error(RAST_ERROR_GENERAL,
                          "encoding modules are already loaded");
    }
    apr_pool_create(&encoding_module_pool, rast_get_global_pool());
    encoding_modules = apr_hash_make(encoding_module_pool);
    status = apr_dir_open(&dir, dirname, encoding_module_pool);
    if (status != APR_SUCCESS) {
        return apr_status_to_rast_error(status);
    }
    while (1) {
        char *path, *var_name;
        const char *module_name;
        apr_dso_handle_t *handle;
        apr_dso_handle_sym_t sym;
        encoding_module_info_t *module_info;

        status = apr_dir_read(&finfo, APR_FINFO_TYPE | APR_FINFO_NAME, dir);
        if (status == APR_ENOENT) {
            break;
        }
        if (status != APR_SUCCESS || finfo.filetype != APR_REG) {
            continue;
        }
        name_len = strlen(finfo.name);
        if (name_len <= shrext_len ||
            strcmp(finfo.name + name_len - shrext_len, SHREXT) != 0) {
            continue;
        }
        path = apr_pstrcat(encoding_module_pool,
                           dirname, "/", finfo.name, NULL);
        status = apr_dso_load(&handle, path, encoding_module_pool);
        if (status != APR_SUCCESS) {
            continue;
        }
        module_name = apr_pstrndup(encoding_module_pool,
                                   finfo.name, name_len - shrext_len);
        var_name = apr_pstrcat(encoding_module_pool,
                               "rast_encoding_", module_name, NULL);
        status = apr_dso_sym(&sym, handle, var_name);
        if (status != APR_SUCCESS) {
            apr_dso_unload(handle);
            apr_dir_close(dir);
            return apr_status_to_rast_error(status);
        }
        module_info = (encoding_module_info_t *)
            apr_palloc(encoding_module_pool, sizeof(encoding_module_info_t));
        module_info->dso_handle = handle;
        module_info->module = (rast_encoding_module_t *) sym;
        apr_hash_set(encoding_modules,
                     module_name, strlen(module_name), module_info);
    }
    status = apr_dir_close(dir);
    return apr_status_to_rast_error(status);
}

rast_error_t *
rast_unload_encoding_modules()
{
    apr_hash_index_t *hi;
    apr_status_t status;
    rast_error_t *error = RAST_OK;

    for (hi = apr_hash_first(encoding_module_pool, encoding_modules); hi;
         hi = apr_hash_next(hi)) {
        const void *key;
        apr_ssize_t key_nbytes;
        void *val;
        encoding_module_info_t *module_info;

        apr_hash_this(hi, &key, &key_nbytes, &val);
        module_info = (encoding_module_info_t *) val;
        status = apr_dso_unload(module_info->dso_handle);
        if (status != APR_SUCCESS) {
            error = apr_status_to_rast_error(status);
        }
    }
    apr_pool_destroy(encoding_module_pool);
    encoding_module_pool = NULL;
    return error;
}

rast_error_t *
rast_get_encoding_module(const char *name,
                         rast_encoding_module_t **encoding_module)
{
    encoding_module_info_t *module_info;

    if (encoding_modules == NULL) {
        return rast_error(RAST_ERROR_GENERAL,
                          "encoding modules are not loaded yet");
    }
    module_info = (encoding_module_info_t *)
        apr_hash_get(encoding_modules, name, strlen(name));
    if (module_info == NULL) {
        return rast_error(RAST_ERROR_GENERAL,
                          "not supported encoding: %s", name);
    }
    *encoding_module = module_info->module;
    return RAST_OK;
}

char *
rast_normalize_text(rast_encoding_module_t *encoding_module,
                    const char *s, rast_size_t nbytes,
                    rast_size_t *new_nbytes, apr_pool_t *pool)
{
    char *tmp, *res;
    rast_size_t tmp_nbytes, res_nbytes;
    apr_pool_t *sub_pool;

    apr_pool_create(&sub_pool, pool);
    encoding_module->normalize_text(sub_pool, s, nbytes, &tmp, &tmp_nbytes);
    encoding_module->normalize_chars(pool, tmp, tmp_nbytes, &res, &res_nbytes);
    apr_pool_destroy(sub_pool);
    if (new_nbytes != NULL) {
        *new_nbytes = res_nbytes;
    }
    return res;
}

static inline rast_tokenizer_t *
tokenizer_create(apr_pool_t *pool,
                 rast_encoding_module_t *encoding_module,
                 const char *s, rast_size_t nbytes)
{
    rast_tokenizer_t *tokenizer =
        (rast_tokenizer_t *) apr_palloc(pool, sizeof(rast_tokenizer_t));
    tokenizer->encoding_module = encoding_module;
    tokenizer->pool = pool;
    tokenizer->ptr = s;
    tokenizer->ptr_end = s + nbytes;
    tokenizer->pos = 0;
    tokenizer->context = NULL;
    return tokenizer;
}

static inline rast_error_t *
tokenizer_get_token(rast_tokenizer_t *tokenizer, rast_token_t *token)
{
    rast_error_t *error;

    token->ptr = tokenizer->ptr;
    token->pos = tokenizer->pos;
    error = tokenizer->encoding_module->get_token(tokenizer, token);
    if (error != RAST_OK) {
        return error;
    }
    return RAST_OK;
}

static inline int
tokenizer_is_done(rast_tokenizer_t *tokenizer)
{
    return tokenizer->ptr >= tokenizer->ptr_end;
}

rast_tokenizer_t *
rast_char_tokenizer_create(apr_pool_t *pool,
                           rast_encoding_module_t *encoding_module,
                           const char *s, rast_size_t nbytes)
{
    return tokenizer_create(pool, encoding_module, s, nbytes);
}

static rast_error_t *
get_char_len(rast_tokenizer_t *tokenizer, rast_size_t *char_len)
{
    if (tokenizer->ptr >= tokenizer->ptr_end) {
        *char_len = 0;
        return RAST_OK;
    }

    return tokenizer->encoding_module->get_char_len(tokenizer, char_len);
}

rast_error_t *
rast_char_tokenizer_next(rast_tokenizer_t *tokenizer)
{
    rast_error_t *error;
    rast_size_t char_len;

    error = get_char_len(tokenizer, &char_len);
    if (error != RAST_OK) {
        return error;
    }

    tokenizer->ptr += char_len;
    return RAST_OK;
}

rast_error_t *
rast_char_tokenizer_get_current(rast_tokenizer_t *tokenizer, rast_char_t *ch)
{
    rast_error_t *error;
    rast_size_t len;

    error = get_char_len(tokenizer, &len);
    if (error != RAST_OK) {
        return error;
    }

    ch->encoding_module = tokenizer->encoding_module;
    ch->ptr = tokenizer->ptr;
    ch->nbytes = len;
    return RAST_OK;
}

int
rast_char_tokenizer_is_done(rast_tokenizer_t *tokenizer)
{
    return tokenizer_is_done(tokenizer);
}

rast_tokenizer_t *
rast_register_tokenizer_create(apr_pool_t *pool,
                               rast_encoding_module_t *encoding_module,
                               const char *s, rast_size_t nbytes)
{
    return tokenizer_create(pool, encoding_module, s, nbytes);
}

rast_error_t *
rast_register_tokenizer_next(rast_tokenizer_t *tokenizer)
{
    rast_error_t *error;
    rast_size_t byte_offset, char_offset;

    error = tokenizer->encoding_module->get_next_offset(tokenizer,
                                                        &byte_offset,
                                                        &char_offset);
    if (error != RAST_OK) {
        return error;
    }

    tokenizer->ptr += byte_offset;
    tokenizer->pos += char_offset;
    return RAST_OK;
}

rast_error_t *
rast_register_tokenizer_get_current(rast_tokenizer_t *tokenizer,
                                    rast_token_t *token)
{
    return tokenizer_get_token(tokenizer, token);
}

int
rast_register_tokenizer_is_done(rast_tokenizer_t *tokenizer)
{
    return tokenizer_is_done(tokenizer);
}

rast_tokenizer_t *
rast_search_tokenizer_create(apr_pool_t *pool,
                             rast_encoding_module_t *encoding_module,
                             const char *s, rast_size_t nbytes)
{
    return tokenizer_create(pool, encoding_module, s, nbytes);
}

rast_error_t *
rast_search_tokenizer_next(rast_tokenizer_t *tokenizer)
{
    rast_error_t *error;
    rast_size_t byte_offset, char_offset;
    rast_token_t token;

    error = tokenizer->encoding_module->get_next_offset(tokenizer,
                                                        &byte_offset,
                                                        &char_offset);
    if (error != RAST_OK) {
        return error;
    }

    error = tokenizer->encoding_module->get_token(tokenizer, &token);
    if (error != RAST_OK) {
        return error;
    }

    if (tokenizer->ptr + token.nbytes >= tokenizer->ptr_end) {
        tokenizer->ptr = tokenizer->ptr_end;
        return RAST_OK;
    }

    tokenizer->ptr += byte_offset;
    tokenizer->pos += char_offset;
    return RAST_OK;
}

rast_error_t *
rast_search_tokenizer_get_current(rast_tokenizer_t *tokenizer,
                                  rast_token_t *token)
{
    return tokenizer_get_token(tokenizer, token);
}

int
rast_search_tokenizer_is_done(rast_tokenizer_t *tokenizer)
{
    return tokenizer_is_done(tokenizer);
}

int
rast_count_chars(rast_encoding_module_t *encoding_module,
                 const char *s, rast_size_t nbytes, apr_pool_t *pool)
{
    int count = 0;
    rast_tokenizer_t *tokenizer;

    for (tokenizer = rast_char_tokenizer_create(pool, encoding_module,
                                                s, nbytes);
         !rast_char_tokenizer_is_done(tokenizer);
         rast_char_tokenizer_next(tokenizer)) {
        count++;
    }
    return count;
}

int
rast_char_is_space(rast_char_t *ch)
{
    if (ch->encoding_module->is_space == NULL) {
        return isspace((unsigned char) *ch->ptr);
    }
    return ch->encoding_module->is_space(ch);
}

/* vim: set filetype=c sw=4 expandtab : */
