/***********************************************************************************

    Copyright (C) 2007-2019 Ahmet Öztürk (aoz_2@yahoo.com)

    This file is part of Lifeograph.

    Lifeograph is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Lifeograph is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Lifeograph.  If not, see <http://www.gnu.org/licenses/>.

***********************************************************************************/


#include "entry_parser.hpp"
#include "diarydata.hpp"


using namespace LIFEO;

// TEXT FORMATTING
EntryParser::Recipe::Contents
    EntryParser::m_rc_subheading =
    { CC_NEWLINE,
      { CC_SPACE, &EntryParser::set_start },
      { CM_ANY_BUT_NEWLINE, &EntryParser::junction_subheading } },

    EntryParser::m_rc_markup =
    { CM_BLANK,
      { CA_MARKUP, &EntryParser::junction_markup } },

    EntryParser::m_rc_markup_b_end =
    { { CM_ANY_BUT_NEWLINE|CS_MULTIPLE, &EntryParser::junction_markup2 },
      { CC_ASTERISK, &EntryParser::apply_bold } },

    EntryParser::m_rc_markup_i_end =
    { { CM_ANY_BUT_NEWLINE|CS_MULTIPLE, &EntryParser::junction_markup2 },
      { CC_UNDERSCORE, &EntryParser::apply_italic } },

    EntryParser::m_rc_markup_h_end =
    { { CM_ANY_BUT_NEWLINE|CS_MULTIPLE, &EntryParser::junction_markup2 },
      { CC_HASH, &EntryParser::apply_highlight } },

    EntryParser::m_rc_markup_s_end =
    { { CM_ANY_BUT_NEWLINE|CS_MULTIPLE, &EntryParser::junction_markup2 },
      { CC_EQUALS, &EntryParser::apply_strikethrough } },

    EntryParser::m_rc_comment =
    { { CC_SBB, &EntryParser::set_start },
      CC_SBB,
      CM_ANY_BUT_NEWLINE|CS_MULTIPLE,
      CC_SBE,
      { CC_SBE, &EntryParser::apply_comment } },

    EntryParser::m_rc_ignore =
    { CC_NEWLINE,
      { CC_DOT, &EntryParser::set_start },
      CC_TAB,
      { CM_ANY_BUT_NEWLINE|CS_MULTIPLE, &EntryParser::add_block },
      { CC_NEWLINE, &EntryParser::apply_ignore } },

    EntryParser::m_rc_todo =
    { CC_NEWLINE,
      CC_TAB|CS_MULTIPLE,
      { CC_SBB, &EntryParser::set_start },
      CA_TODO_STATUS,
      CC_SBE,
      { CC_SPACE, &EntryParser::junction_todo } },

    EntryParser::m_rc_indent =
    { { CC_NEWLINE, &EntryParser::set_start },
      CC_TAB|CS_MULTIPLE,
      { CM_NONSPACE, &EntryParser::apply_indent } };

// LINK
EntryParser::Recipe::Contents
    EntryParser::m_rc_date  =
    { CM_BLANK|CF_PUNCTUATION,
      { CC_NUMBER, &EntryParser::set_start },
      CC_NUMBER,
      CC_NUMBER,
      CC_NUMBER,
      { CA_DATE_SEPARATOR, &EntryParser::junction_date_dotym },
      CC_NUMBER,
      CC_NUMBER,
      { CA_DATE_SEPARATOR, &EntryParser::junction_date_dotmd },
      CC_NUMBER,
      CC_NUMBER,
      { CM_BLANK, &EntryParser::check_date } },

    EntryParser::m_rc_colon =
    { { CC_COLON, &EntryParser::junction_colon } },

    EntryParser::m_rc_at =
    { { CC_AT, &EntryParser::junction_at } },

    EntryParser::m_rc_link_file =
    { CC_SLASH,
      CC_SLASH,
      CM_NONSPACE|CS_MULTIPLE,
      { CM_BLANK, &EntryParser::junction_link } },

    EntryParser::m_rc_link_email =
    { CM_NONSPACE|CS_MULTIPLE,
      CC_AT,
      CM_NONSPACE|CS_MULTIPLE,
      CC_DOT,
      CM_NONSPACE|CS_MULTIPLE,
      { CM_BLANK, &EntryParser::junction_link } },

    EntryParser::m_rc_link_id =
    { CF_NUMBER,
      CF_NUMBER,
      CF_NUMBER,
      CF_NUMBER|CS_MULTIPLE,
      { CM_BLANK, &EntryParser::junction_link } },

    EntryParser::m_rc_chart =
    { CM_ANY_BUT_NEWLINE|CS_MULTIPLE,
      { CC_NEWLINE, &EntryParser::apply_chart } },

    EntryParser::m_rc_link_end =
    { { CC_TAB, &EntryParser::set_middle },
      CM_ANY_BUT_NEWLINE|CS_MULTIPLE,
      { CC_MORE, &EntryParser::apply_link_hidden } },

    EntryParser::m_rc_email_end =
    { CM_NONSPACE|CS_MULTIPLE,
      CC_DOT,
      CM_NONSPACE|CS_MULTIPLE,
      { CM_BLANK, &EntryParser::junction_link } };


EntryParser::EntryParser()
{
    m_all_recipes.insert( new Recipe{ RID_SUBHEADING, this, &m_rc_subheading, 0 } );
    m_all_recipes.insert( new Recipe{ RID_MARKUP, this, &m_rc_markup, 0 } );
    m_all_recipes.insert( new Recipe{ RID_COMMENT, this, &m_rc_comment, 0 } );
    m_all_recipes.insert( new Recipe{ RID_IGNORE, this, &m_rc_ignore, RID_ALL } );
    m_all_recipes.insert( new Recipe{ RID_TODO, this, &m_rc_todo, 0 } );
    m_all_recipes.insert( new Recipe{ RID_DATE, this, &m_rc_date, 0 } );
    m_all_recipes.insert( new Recipe{ RID_COLON, this, &m_rc_colon, 0 } );
    m_all_recipes.insert( new Recipe{ RID_AT, this, &m_rc_at, 0 } );
    m_all_recipes.insert( new Recipe{ RID_AT, this, &m_rc_indent, 0 } );
}

void
EntryParser::reset( Ustring::size_type start, Ustring::size_type end )
{
    m_pos_end = end;
    m_pos_curr = m_pos_blank = start;

    m_cf_curr = CC_NOTHING;
    m_cf_last = CC_NOT_SET;
    m_word_curr.clear();
    m_word_count = 0;
    m_int_last = 0;
    m_date_last = 0;

    if( start == 0 )
        apply_heading( end > 0 );

    for( auto r : m_active_recipes )
        delete r;
    m_active_recipes.clear();
}

void
EntryParser::set_search_str( const Ustring& str )
{
    m_search_str = str;
    i_search = 0;
    i_search_end = str.size() - 1;
}

void
EntryParser::parse( Ustring::size_type start, Ustring::size_type end )
{
    reset( start, end );

    for( ; m_pos_curr < m_pos_end; ++m_pos_curr )
    {
        m_char_last = m_char_curr;
        m_char_curr = get_char_at( m_pos_curr );

        if( !m_search_str.empty() )
        {
            if( m_search_str[ i_search ] == char_lower( m_char_curr ) )
            {
                if( i_search == 0 )
                    m_pos_search = m_pos_curr;
                if( i_search == i_search_end )
                {
                    apply_match();
                    i_search = 0;
                }
                else
                    i_search++;
            }
            else
                i_search = 0;
        }

        // MARKUP PARSING
        switch( m_char_curr )
        {
            case 0:     // should never be the case
            case '\n':
            case '\r':
                m_cf_curr = CC_NEWLINE|CF_NEWLINE;
                process_char();
                break;
            case ' ':
                m_cf_curr = CC_SPACE|CF_SPACE|CA_TODO_STATUS;
                process_char();
                break;
            case '*': // SIGN
                m_cf_curr = CC_ASTERISK|CF_PUNCTUATION|CA_MARKUP;
                process_char();
                break;
            case '_': // SIGN
                m_cf_curr = CC_UNDERSCORE|CF_PUNCTUATION|CA_MARKUP;
                process_char();
                break;
            case '=': // SIGN
                m_cf_curr = CC_EQUALS|CF_PUNCTUATION|CA_MARKUP;
                process_char();
                break;
            case '#': // SIGN
                m_cf_curr = CC_HASH|CF_PUNCTUATION|CA_MARKUP;
                process_char();
                break;
            case '[': // SIGN
                m_cf_curr = CC_SBB|CF_PUNCTUATION;
                process_char();
                break;
            case ']': // SIGN
                m_cf_curr = CC_SBE|CF_PUNCTUATION;
                process_char();
                break;
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                m_cf_curr = CC_NUMBER|CF_NUMBER;
                handle_number();   // calculates numeric value
                process_char();
                break;
            case '.': // SIGN
                m_cf_curr = CC_DOT|CF_PUNCTUATION|CA_DATE_SEPARATOR;
                process_char();
                break;
            case '-': // SIGN - CF_SIGNSPELL does not seem to be necessary
                m_cf_curr = CC_DASH|CF_PUNCTUATION|CA_DATE_SEPARATOR;
                process_char();
                break;
            case '/': // SIGN
                m_cf_curr = CC_SLASH|CF_PUNCTUATION|CA_DATE_SEPARATOR;
                process_char();
                break;
            case ':': // SIGN
                m_cf_curr = CC_COLON|CF_PUNCTUATION;
                process_char();
                break;
            case '@': // SIGN
                m_cf_curr = CC_AT|CF_PUNCTUATION;
                process_char();
                break;
            case '<': // SIGN
                m_cf_curr = CC_LESS|CF_PUNCTUATION;
                process_char();
                break;
            case '>': // SIGN
                m_cf_curr = CC_MORE|CF_PUNCTUATION|CA_TODO_STATUS;
                process_char();
                break;
            case '\t':
                m_cf_curr = CC_TAB|CF_SPACE;
                process_char();
                break;
            // LIST CHARS
            case '~':
                m_cf_curr = CC_TILDE|CF_PUNCTUATION|CA_TODO_STATUS;
                process_char();
                break;
            case '+':
                m_cf_curr = CC_PLUS|CF_PUNCTUATION|CA_TODO_STATUS;
                process_char();
                break;
            case 'x':
            case 'X':
                m_cf_curr = CC_X|CF_ALPHA|CA_TODO_STATUS;
                process_char();
                break;
            default:
                m_cf_curr = is_char_alpha( m_char_curr ) ?
                        CC_ALPHA|CF_ALPHA : CC_PUNCT|CF_PUNCTUATION;
                process_char();
                break;
        }
    }
    // end of the text -treated like new line for all means and purposes
    if( m_pos_end > 0 ) // only when finish is not forced
    {
        m_char_last = m_char_curr;
        m_char_curr = '\n';
        m_cf_curr = CC_NEWLINE|CF_NEWLINE;
        process_char();
    }
}

EntryParser::Recipe::State
EntryParser::Recipe::process_char()
{
    if( m_parent->m_blocked_flags & m_id )
        return( m_state = RS_REJECTED );

    CharClass cfi{ m_contents->at( m_index ).flags };
    if( cmp_chars( cfi, m_parent->m_cf_curr ) )
    {
        if( m_contents->at( m_index ).applier )
        {
            m_parent->m_recipe_curr = this;
            // applier may set a value for m_state:
            ( m_parent->*m_contents->at( m_index ).applier )();
        }

        if( !( m_state & RS_IN_PROGRESS ) )
            m_state = ( m_state & RS_BLOCK ) | RS_IN_PROGRESS;

        m_index++;
    }
    else
    if( m_index == 0 ||
        ( m_index > 0 && !cmp_chars( m_contents->at( m_index - 1 ).flags,
                                     m_parent->m_cf_curr ) ) )
        m_state = RS_REJECTED;

    if( m_index == m_contents->size() )
        m_state = ( m_state & RS_BLOCK ) | RS_ACCEPTED;

    if( m_state & RS_BLOCK )
        m_parent->m_blocked_flags |= m_blocks;

    return m_state;
}

inline void
EntryParser::process_char()
{
    m_blocked_flags = 0;

    // UPDATE WORD LAST
    if( m_cf_curr & CF_ALPHA )
    {
        if( not( m_cf_last & CF_ALPHA ) )
        {
            m_word_curr.clear();
            m_word_count++;
        }

        m_word_curr += m_char_curr;
    }
    else if( m_cf_curr & CM_BLANK )
    {
        m_pos_blank = m_pos_curr;
        if( m_flag_check_word )
            check_word();
    }

    // FIRST CHECK ACTIVE RECIPES
    for( auto i = m_active_recipes.begin(); i != m_active_recipes.end(); )
    {
        Recipe* r{ *i };
        if( !( r->process_char() & Recipe::RS_IN_PROGRESS ) )
        {
            i = m_active_recipes.erase( i );
            delete r;
        }
        else
            i++;
    }

    // THEN CHECK IF IT TRIGGERS ANY OTHER RECIPE
    for( Recipe* r : m_all_recipes )
    {
        r->m_index = 0;
        r->m_state = Recipe::RS_NOT_SET;
        if( r->process_char() == Recipe::RS_IN_PROGRESS )
            m_active_recipes.push_back( new Recipe( r ) );
    }

    m_cf_last = m_cf_curr;
}

// JUNCTIONS =======================================================================================
void
EntryParser::check_date()
{
    m_date_last.set_day( m_int_last );

    if( m_date_last.is_valid() )
    {
        m_link_type_last = RID_DATE;
        junction_link();
    }
}

void
EntryParser::junction_subheading()
{
    if( m_char_curr == ' ' )
        apply_subsubheading();
    else
        apply_subheading();
}

void
EntryParser::junction_markup()
{
    set_start();
    m_recipe_curr->m_index = 0;    // as it will be ++
    m_recipe_curr->m_state |= Recipe::RS_BLOCK;

    switch( get_char_at( m_pos_curr ) )
    {
        case '*':
            m_recipe_curr->m_id = RID_BOLD;
            m_recipe_curr->m_blocks = RID_BOLD;
            m_recipe_curr->m_contents = &m_rc_markup_b_end;
            break;
        case '_':
            m_recipe_curr->m_id = RID_ITALIC;
            m_recipe_curr->m_blocks = RID_ITALIC;
            m_recipe_curr->m_contents = &m_rc_markup_i_end;
            break;
        case '#':
            m_recipe_curr->m_id = RID_HIGHLIGHT;
            m_recipe_curr->m_blocks = RID_HIGHLIGHT;
            m_recipe_curr->m_contents = &m_rc_markup_h_end;
            break;
        case '=':
            m_recipe_curr->m_id = RID_STRIKETHROUGH;
            m_recipe_curr->m_blocks = RID_STRIKETHROUGH;
            m_recipe_curr->m_contents = &m_rc_markup_s_end;
            break;
    }
}

void
EntryParser::junction_markup2()
{
    switch( m_recipe_curr->m_id )
    {
        case RID_BOLD:
            m_recipe_curr->m_id = RID_MARKUP_B_END;
            break;
        case RID_ITALIC:
            m_recipe_curr->m_id = RID_MARKUP_I_END;
            break;
        case RID_HIGHLIGHT:
            m_recipe_curr->m_id = RID_MARKUP_H_END;
            break;
        case RID_STRIKETHROUGH:
            m_recipe_curr->m_id = RID_MARKUP_S_END;
            break;
    }
}

void
EntryParser::junction_todo()
{
    switch( get_char_at( m_pos_curr - 2 ) )
    {
        case ' ':
            apply_check_unf();
            break;
        case '~':
            apply_check_prg();
            break;
        case '+':
            apply_check_fin();
            break;
        case 'x':
        case 'X':
        case '>': // extra sign for distinguishing deferred items
            apply_check_ccl();
            break;
        default:
            break;
    }
}

void
EntryParser::junction_date_dotym()
{
    if( m_int_last >= Date::YEAR_MIN && m_int_last <= Date::YEAR_MAX )
        m_date_last.set_year( m_int_last );
    else
        m_recipe_curr->m_state = Recipe::RS_REJECTED;
}

void
EntryParser::junction_date_dotmd()
{
    if( m_int_last >= 1 && m_int_last <= 12 &&
        // two separators must be the same:
        get_char_at( m_pos_curr - 3 ) == m_char_curr )
    {
        m_date_last.set_month( m_int_last );
    }
    else
        m_recipe_curr->m_state = Recipe::RS_REJECTED;
}

void
EntryParser::junction_colon()
{
    PRINT_DEBUG( "word_curr: ", m_word_curr );

    Recipe::Contents* rc{ nullptr };

    if( m_word_curr == "http" || m_word_curr == "https" ||
        m_word_curr == "ftp" || m_word_curr == "file" || m_word_curr == "rel" )
    {
        m_link_type_last = RID_URL;
        rc =  &m_rc_link_file;
    }
    else
    if( m_word_curr == "mailto" )
    {
        m_link_type_last = RID_URL;
        rc = &m_rc_link_email;
    }
    else
    if( m_word_curr == "deid" )
    {
        m_link_type_last = RID_ID;
        rc = &m_rc_link_id;
    }
    else
    if( m_word_curr == "chart" && m_pos_curr > 5 && get_char_at( m_pos_curr - 6 ) == '\n' )
    {
        m_link_type_last = RID_CHART;
        m_active_recipes.push_back(
            new Recipe{ RID_CHART, this, &m_rc_chart, RID_AT,
                        m_pos_curr - m_word_curr.length(), m_pos_curr + 1 } );
    }

    if( rc )
        m_active_recipes.push_back(
            new Recipe{ RID_LINK_END, this, rc, RID_AT, m_pos_curr - m_word_curr.length(), 0 } );
}

void
EntryParser::junction_at()
{
    m_active_recipes.push_back(
            new Recipe{ RID_EMAIL_END, this, &m_rc_email_end, 0, m_pos_blank + 1, 0 } );
}

void
EntryParser::junction_link()
{
    if( m_recipe_curr->m_pos_start > 0 &&
        get_char_at( m_recipe_curr->m_pos_start - 1 ) == '<' )
    {
        m_active_recipes.push_back(
                new Recipe{ RID_LINK_END, this, &m_rc_link_end, 0,
                            m_recipe_curr->m_pos_start - 1, m_pos_curr } );
    }
    else
    {
        if( m_link_type_last == RID_ID )
            m_recipe_curr->m_int_value = m_int_last;
        apply_link();
    }
}

// HELPERS =========================================================================================
inline void
EntryParser::set_start()
{
    m_recipe_curr->m_pos_start = m_pos_curr;
}

inline void
EntryParser::set_middle()
{
    m_recipe_curr->m_pos_middle = m_pos_curr;
}

inline void
EntryParser::add_block()
{
    m_recipe_curr->m_state |= Recipe::RS_BLOCK;
}

inline void
EntryParser::handle_number()
{
    if( m_cf_last & CF_NUMBER )
    {
        m_int_last *= 10;
        m_int_last += ( m_char_curr - '0' );
    }
    else
        m_int_last = ( m_char_curr - '0' );
}
