/** @file
 */
#if defined(HAVE_CONFIG_H)
#  include "../../config.h"
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#define GNU_SOURCE
#include <cstring>
#include <cerrno>

#include <glib/gmessages.h>

#include "hyperestraier/hyperestraier.hpp"
#include "text_filter.hpp"


namespace gdestraier {
  namespace builder {
    namespace filter {

      from_text::from_text()
      {
      }


      from_text::~from_text()
      {
      }


      factory::extention_map_type from_text::extentions_[] = {
        { "txt", "text/plain" },
        { "text", "text/plain" },
        { "asc", "text/plain" },
        { "ascii", "text/plain" },
        { "c", "text/x-csrc" },
        { "h", "text/x-chdr" },
        { "c++", "text/x-c++src" },
        { "cpp", "text/x-c++src" },
        { "cxx", "text/x-c++src" },
        { "cc", "text/x-c++src" },
        { "h++", "text/x-c++hdr" },
        { "hpp", "text/x-c++hdr" },
        { "hxx", "text/x-c++hdr" },
        { "hh", "text/x-c++hdr" },
        { "java", "text/x-java" },
        { "phtml", "application/x-httpd-php" },
        { "pht", "application/x-httpd-php" },
        { "php", "application/x-httpd-php" },
        { "phps", "application/x-httpd-php-source" },
        { "php3", "application/x-httpd-php3" },
        { "php3p", "application/x-httpd-php3-preprocessed" },
        { "php4", "application/x-httpd/php4" },
        { "js", "application/x-javascript" },
        { "pl", "text/x-perl" },
        { "pm", "text/x-perl" },
        { 0, 0 }
      };

      factory const& from_text::get_factory() {
        static factory f("Plain text", &from_text::create, from_text::extentions_);
        return f;
      }

      abstract_filter* from_text::create() { return new from_text; }


      bool from_text::operator() (hyperestraier::local_document* doc,
                                  gdestraier::model::index_type const& index,
                                  ::GnomeVFSURI* uri,
                                  char const* text_ur,
                                 ::GnomeVFSFileInfo* info,
                                  char const* mime_type) const
      {
        doc->create();
        doc->set_attr(ESTDATTRTYPE, (mime_type? mime_type : "text/plain"));

        if (info->size == 0) return true;

        ::GnomeVFSFileSize bytes_read;
        void* content = load_file_content(uri, info->size, &bytes_read);
        if (content == 0) return false;


        // 文字コードを変換する
        //
        gdestraier::model::encoding const* encoding = index.fathom_encoding(static_cast<char const*>(content),
                                                                            static_cast<char const*>(content) + bytes_read);
        doc->set_attr("encoding", encoding->id_);

        if (! encoding->is_utf8_compatible_) {
          // UTF8互換エンコーディングで無いので変換する
          int utf8len = 0;
          char* utf8text = ::est_iconv(static_cast<char const*>(content), bytes_read, encoding->id_, "UTF-8", &utf8len, 0);

          std::free(content);
          if (utf8text == 0) return true; // 変換に失敗したけど、コンテンツ無しで登録する

          bytes_read = utf8len;
          content = utf8text;
        }

        parse_text(static_cast<char*>(content), std::size_t(bytes_read), index, doc);
        std::free(content);

        return true;
      }



      /** @brief テキストを１行づつ文書オブジェクトに追加していきます。
       *
       * NOTE:
       *   - 行頭と行末の空白は除去されます。
       *   - text の内容は破壊されます。
       */
      bool
      from_text::parse_text(char* text,
                            std::size_t len,
                            gdestraier::model::index_type const& index,
                            hyperestraier::local_document* doc) const
      {
        char* line = text;
        char const* text_tail = text + len;

        while (line != text_tail) {
          // Skip spaces
          while (line != text_tail && (*line == '\0' || *line == ' ' || *line == '\t' || *line == '\n' || *line == '\r') )
            line++;
          if (line == text_tail) return true;

          // Search newline
          char* tail = line; // 行末を探すためのポインタ
          char* last = line; // 最後に見付けた空白以外の文字のある場所
          while (tail != text_tail && *tail != '\0' && *tail != '\n' && *tail != '\r') {
            if (*tail != ' ' && *tail != '\t') last = tail;
            tail++;
          }

          last[1] = '\0';
          doc->add_text(line, false);

          line = tail;
        }
        return true;
      }
    }
  }
}
