/*
This source file is part of Scol
For the latest info, see http://www.scolring.org

Copyright (c) 2010 Stephane Bisaro, aka Iri <iri@irizone.net>

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place - Suite 330, Boston, MA 02111-1307, USA, or go to
http://www.gnu.org/copyleft/lesser.txt

For others informations, please contact us from http://www.scolring.org/
*/

#ifdef __cplusplus
#error This source file is not C++ but rather C. Please use a C-compiler
#endif



#include "../include/scol_glib_pregex.h"

/* http://library.gnome.org/devel/glib/unstable/glib-Perl-compatible-regular-expressions.html */

#if ((defined _WIN32) || (defined __WIN32__))
cbmachine ww;
#endif
mmachine  mm;

/*
    Easy interface

    - SCOL_pcreEasyMatch
    - SCOL_pcreEasySplit
    - SCOL_pcreEasyReplace
*/


/**
 * \brief Scans for a match in string for pattern
 * \param : S : pattern : the regular expression
 * \param : S : string : the string to scan for matches
 * \return : I : 1 if matched, else 0 (or nil if string or pattern is nil)
 *
 * The compile options for the regular expression are at 0
 * The match options ara at 0 too
 */
int SCOL_pcreEasyMatch (mmachine m)
{
    int mpattern, mstring;
    gchar *pattern, *string;

    MMechostr (MSKDEBUG, "SCOL_pcreEasyMatch : entering\n");

    mstring = MTOP (MMpull (m));
    mpattern = MTOP (MMpull (m));

    if((mstring == NIL) || (mpattern == NIL))
    {
        MMechostr (0, "SCOL_pcreEasyMatch error : an argument is nil");
        MMpush (m, NIL);
        return 0;
    }
    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));

    MMpush (m, ITOM (g_regex_match_simple (pattern, string, 0, 0)));
    g_free (pattern);
    g_free (string);
    return 0;
}

/**
 * \brief Breaks the string on the pattern, and returns an list of the tokens.
 * \param : S : pattern : the regular expression
 * \param : S : string : the string to scan for matches
 * \return : [S r1] : a list of substrings or nil
 *
 * The compile options for the regular expression are at 0
 * The match options ara at 0 too
 */
int SCOL_pcreEasySplit (mmachine m)
{
    int mpattern, mstring;
    int i = 0;
    gchar *pattern, *string;
    gchar **result;

    MMechostr (MSKDEBUG, "SCOL_pcreEsaySplit : entering\n");

    mstring = MTOP (MMpull (m));
    mpattern = MTOP (MMpull (m));

    if (mstring == NIL)
    {
        MMechostr (0, "SCOL_pcreEsaySplit error : an argument is nil");
        MMpush (m, NIL);
        return 0;
    }
    if (mpattern == NIL)
    {
        Mpushstrbloc (m, MMstartstr (m, mstring));
        MMpush (m, NIL);
        MMpush (m, ITOM (2));
        MBdeftab (m);
        return 0;
    }

    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));

    result = g_regex_split_simple (pattern, string, 0, 0);
    while (result[i] != NULL)
    {
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
        i++;
    }
    g_strfreev (result);
    g_free (pattern);
    g_free (string);
    MMpush (m, NIL);
    for (; i > 0; i--)
    {
        MMpush (m, ITOM (2));
        MBdeftab (m);
    }
    return 0;
}

/**
 * \brief Replaces all occurrences of the pattern in string with a replacement text
 * \param S : pattern
 * \param S : string
 * \param S : replacement text
 * \return S : new string containing the replacements
 */
int SCOL_pcreEasyReplace (mmachine m)
{
    int mpattern, mstring, mreplace;
    GRegex *regex;
    gchar *result;
    gchar *pattern, *string, *replace;

    MMechostr (MSKDEBUG, "SCOL_pcreEasyReplace : entering\n");

    mreplace = MTOP (MMpull (m));
    mstring = MTOP (MMpull (m));
    mpattern = MTOP (MMpull (m));

    if(mstring == NIL)
    {
        MMechostr (0, "SCOL_pcreEasyReplace error : an argument is nil");
        MMpush (m, NIL);
        return 0;
    }
    if ((mreplace == NIL) || (mpattern == NIL))
    {
        Mpushstrbloc (m, MMstartstr (m, mstring));
        return 0;
    }

    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));

    regex = scol_pcre_regex_new (pattern, 0, 0);
    result = g_regex_replace (regex, string, strlen (string), 0, replace, 0, NULL);
    result = UTF8SCOL (result, strlen (result));
    Mpushstrbloc (m, result);
    g_free (result);
    g_regex_unref (regex);
    g_free (pattern);
    g_free (string);
    g_free (replace);
    return 0;
}


/*
    Normal interface

    - SCOL_pcreNormalMatch
    - SCOL_pcreNormalSplit
*/

int SCOL_pcreNormalMatch (mmachine m)
{
    int mpattern, mstring, mcompile, mstart, mmatch, malgo;
    gchar * pattern, *string;

    MMechostr (MSKDEBUG, "SCOL_pcreNormalMatch : entering\n");

    malgo = MTOI (MMpull (m));
    mmatch = MMpull (m);
    mstart = MTOI (MMpull (m));
    mcompile = MMpull (m);
    mstring = MTOP (MMpull (m));
    mpattern = MTOP (MMpull (m));

    if ((mstring == NIL) || (mpattern == NIL))
    {
        MMechostr (0, "SCOL_pcreNormalMatch error : string or pattern is nil\n");
        MMpush (m, NIL);
        return 0;
    }

    if (mstart< 0)
        mstart = 0;

    if (malgo != MATCH_DFA)
        malgo = MATCH_STANDARD;

    if (mcompile == NIL)
        mcompile = 0;

    if (mmatch == NIL)
        mmatch = 0;

    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));

    if (malgo == MATCH_STANDARD)
        scol_pcre_match_standard (m, pattern, string, mcompile, mmatch, mstart);
    else
        scol_pcre_match_DFA (m, pattern, string, mcompile, mmatch, mstart);

    g_free (pattern);
    g_free (string);
    return 0;
}

int SCOL_pcreNormalSplit (mmachine m)
{
    int mpattern, mstring, mcompile, mstart, mmax, mmatch;
    int i = 0;
    gchar *pattern, *string;
    GRegex *regex;
    GError *error = NULL;
    gchar **result;

    MMechostr (MSKDEBUG, "SCOL_pcreNormalSplit : entering\n");

    mmax = MTOI (MMpull (m));
    mmatch = MMpull (m);
    mstart = MTOI (MMpull (m));
    mcompile = MMpull (m);
    mstring = MTOP (MMpull (m));
    mpattern = MTOP (MMpull (m));

    if ((mstring == NIL) || (mpattern == NIL))
    {
        MMechostr (0, "SCOL_pcreNormalSplit error : string or pattern is nil\n");
        MMpush (m, NIL);
        return 0;
    }

    if (mstart < 0)
        mstart = 0;
    if (mcompile == NIL)
        mcompile = 0;
    if (mmatch == NIL)
        mmatch = 0;

    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));

    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
    result = g_regex_split_full (regex, string, strlen (string), mstart, mmatch, mmax, &error);
    g_regex_unref (regex);
    if (error != NULL)
    {
        MMechostr (0, "SCOL_pcreNormalSplit error : %s\n", error->message);
        MMpush (m, NIL);
        g_error_free (error);
    }

    while (result[i] != NULL)
    {
        Mpushstrbloc (m, UTF8SCOL (result[i], strlen (result[i])));
        i++;
    }
    g_strfreev (result);
    g_free (pattern);
    g_free (string);
    MMpush (m, NIL);
    for (; i > 0; i--)
    {
        MMpush (m, ITOM (2));
        MBdeftab (m);
    }
    return 0;
}

int SCOL_pcreNormalReplace (mmachine m)
{
    int mpattern, mstring, mreplace, mcompile, mstart, mmatch;
    gchar *pattern, *string, *replace, *result;
    GRegex *regex;
    GError *error = NULL;

    MMechostr (MSKDEBUG, "SCOL_pcreNormalReplace : entering\n");

    mmatch = MMpull (m);
    mstart = MTOI (MMpull (m));
    mcompile = MMpull (m);
    mreplace = MTOP (MMpull (m));
    mstring = MTOP (MMpull (m));
    mpattern = MTOP (MMpull (m));

    if(mstring == NIL)
    {
        MMechostr (0, "SCOL_pcreNormalReplace error : string is nil");
        MMpush (m, NIL);
        return 0;
    }
    if ((mreplace == NIL) || (mpattern == NIL))
    {
        Mpushstrbloc (m, MMstartstr (m, mstring));
        return 0;
    }

    if (mstart < 0)
        mstart = 0;
    if (mcompile == NIL)
        mcompile = 0;
    if (mmatch == NIL)
        mmatch = 0;

    pattern = SCOLUTF8 (MMstartstr (m, mpattern), MMsizestr (m, mpattern));
    string = SCOLUTF8 (MMstartstr (m, mstring), MMsizestr (m, mstring));
    replace = SCOLUTF8 (MMstartstr (m, mreplace), MMsizestr (m, mreplace));

    regex = scol_pcre_regex_new (pattern, mcompile, mmatch);
    result = g_regex_replace (regex, string, strlen (string), mstart, replace, mmatch, &error);
    g_regex_unref (regex);
    if (error != NULL)
    {
        MMechostr (0, "SCOL_pcreNormalReplace error : %s\n", error->message);
        MMpush (m, NIL);
        g_error_free (error);
    }
    Mpushstrbloc (m, UTF8SCOL (result, strlen (result)));
    g_free (result);
    g_free (string);
    g_free (pattern);
    return 0;
}




/*
    Internals functions
    - scol_pcre_regex_new
        GRegex * scol_pcre_regex_new (const gchar *, GRegexCompileFlags, GRegexMatchFlags);
        Create a new regular expression

    - scol_pcre_match_standard
        void scol_pcre_match_full (mmachine, const gchar *, const gchar *, GRegexCompileFlags  GRegexMatchFlags, gint)

    - scol_pcre_match_DFA
        void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)

*/

GRegex * scol_pcre_regex_new (const gchar * pattern, GRegexCompileFlags compile, GRegexMatchFlags match)
{
    return g_regex_new (pattern, compile, match, NULL);
}


void scol_pcre_match_standard (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
{
    GRegex *regex;
    GMatchInfo *match_info;
    GError *error = NULL;
    gchar *word;
    gboolean r;
    int i, n = 0;
    gint pos1 = -1, pos2 = -1;

    regex = scol_pcre_regex_new (pattern, compile, match);

    r = g_regex_match_full (regex, string, strlen (string), start, match, &match_info, &error);
    while (g_match_info_matches (match_info))
    {
        word = g_match_info_fetch (match_info, 0);
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
        Mpushstrbloc (m, word);
        MMpush (m, ITOM (pos1));
        MMpush (m, ITOM (pos2));
        MMpush (m, ITOM (3));
        MBdeftab (m);
        g_free (word);
        pos1 = -1; pos2 = -1;
        g_match_info_next (match_info, &error);
        n++;
    }

    g_match_info_free (match_info);
    g_regex_unref (regex);
    if (error != NULL)
    {
        MMechostr (0, "scol_pcre_match_standard error : %s\n", error->message);
        g_error_free (error);
    }
    MMpush (m, NIL);
    for (i = 0; i < n; i++)
    {
        MMpush (m, ITOM (2));
        MBdeftab (m);
    }
    return;
}

void scol_pcre_match_DFA (mmachine m, const gchar *pattern, const gchar *string, GRegexCompileFlags compile, GRegexMatchFlags match, gint start)
{
    GRegex *regex;
    GMatchInfo *match_info;
    GError *error = NULL;
    gchar *word;
    gboolean r;
    int i, n = 0;
    gint pos1 = -1, pos2 = -1;

    regex = scol_pcre_regex_new (pattern, compile, match);

    r = g_regex_match_all_full (regex, string, strlen (string), start, match, &match_info, &error);
    while (g_match_info_matches (match_info))
    {
        word = g_match_info_fetch (match_info, 0);
        g_match_info_fetch_pos (match_info, 0, &pos1, &pos2);
        Mpushstrbloc (m, word);
        MMpush (m, ITOM (pos1));
        MMpush (m, ITOM (pos2));
        MMpush (m, ITOM (3));
        MBdeftab (m);
        g_free (word);
        pos1 = -1; pos2 = -1;
        g_match_info_next (match_info, &error);
        n++;
    }

    g_match_info_free (match_info);
    g_regex_unref (regex);
    if (error != NULL)
    {
        MMechostr (0, "scol_pcre_match_DFA error : %s\n", error->message);
        g_error_free (error);
    }
    MMpush (m, NIL);
    for (i = 0; i < n; i++)
    {
        MMpush (m, ITOM (2));
        MBdeftab (m);
    }
    return;
}



/* FLAGS */

int PCRE_MATCH_STANDARD (mmachine m) { return MMpush (m, ITOM (MATCH_STANDARD)); }
int PCRE_MATCH_DFA (mmachine m) { return MMpush (m, ITOM (MATCH_DFA)); }

int PCRE_MATCH_ANCHORED (mmachine m) { return MMpush (m, ITOM (MATCH_ANCHORED)); }
int PCRE_MATCH_NOTBOL (mmachine m) { return MMpush (m, ITOM (MATCH_NOTBOL)); }
int PCRE_MATCH_NOTEOL (mmachine m) { return MMpush (m, ITOM (MATCH_NOTEOL)); }
int PCRE_MATCH_NOTEMPTY (mmachine m) { return MMpush (m, ITOM (MATCH_NOTEMPTY)); }
int PCRE_MATCH_PARTIAL (mmachine m) { return MMpush (m, ITOM (MATCH_PARTIAL)); }
int PCRE_MATCH_NEWLINE_CR (mmachine m) { return MMpush (m, ITOM (MATCH_NEWLINE_CR)); }
int PCRE_MATCH_NEWLINE_LF (mmachine m) { return MMpush (m, ITOM (MATCH_NEWLINE_LF)); }
int PCRE_MATCH_NEWLINE_CRLF (mmachine m) { return MMpush (m, ITOM (MATCH_NEWLINE_CRLF)); }
int PCRE_MATCH_NEWLINE_ANY (mmachine m) { return MMpush (m, ITOM (MATCH_NEWLINE_ANY)); }

int PCRE_REGEX_CASELESS (mmachine m) { return MMpush (m, ITOM (REGEX_CASELESS)); }
int PCRE_REGEX_MULTILINE (mmachine m) { return MMpush (m, ITOM (REGEX_MULTILINE)); }
int PCRE_REGEX_DOTALL (mmachine m) { return MMpush (m, ITOM (REGEX_DOTALL)); }
int PCRE_REGEX_EXTENDED (mmachine m) { return MMpush (m, ITOM (REGEX_EXTENDED)); }
int PCRE_REGEX_ANCHORED (mmachine m) { return MMpush (m, ITOM (REGEX_ANCHORED)); }
int PCRE_REGEX_DOLLAR_ENDONLY (mmachine m) { return MMpush (m, ITOM (REGEX_DOLLAR_ENDONLY)); }
int PCRE_REGEX_UNGREEDY (mmachine m) { return MMpush (m, ITOM (REGEX_UNGREEDY)); }
int PCRE_REGEX_RAW (mmachine m) { return MMpush (m, ITOM (REGEX_RAW)); }
int PCRE_REGEX_NO_AUTO_CAPTURE (mmachine m) { return MMpush (m, ITOM (REGEX_NO_AUTO_CAPTURE)); }
int PCRE_REGEX_OPTIMIZE (mmachine m) { return MMpush (m, ITOM (REGEX_OPTIMIZE)); }
int PCRE_REGEX_DUPNAMES (mmachine m) { return MMpush (m, ITOM (REGEX_DUPNAMES)); }
int PCRE_REGEX_NEWLINE_CR (mmachine m) { return MMpush (m, ITOM (REGEX_NEWLINE_CR)); }
int PCRE_REGEX_NEWLINE_LF (mmachine m) { return MMpush (m, ITOM (REGEX_NEWLINE_LF)); }
int PCRE_REGEX_NEWLINE_CRLF (mmachine m) { return MMpush (m, ITOM (REGEX_NEWLINE_CRLF)); }


/* API definitions : */

char* glib_pcre_name[GLIB_PCRE_PKG_NB]=
{
    "PCRE_MATCH_STANDARD", "PCRE_MATCH_DFA",

    "PCRE_MATCH_ANCHORED", "PCRE_MATCH_NOTBOL", "PCRE_MATCH_NOTEOL",
    "PCRE_MATCH_NOTEMPTY", "PCRE_MATCH_PARTIAL", "PCRE_MATCH_NEWLINE_CR",
    "PCRE_MATCH_NEWLINE_LF", "PCRE_MATCH_NEWLINE_CRLF", "PCRE_MATCH_NEWLINE_ANY",

    "PCRE_REGEX_CASELESS", "PCRE_REGEX_MULTILINE", "PCRE_REGEX_DOTALL",
    "PCRE_REGEX_EXTENDED", "PCRE_REGEX_ANCHORED", "PCRE_REGEX_DOLLAR_ENDONLY",
    "PCRE_REGEX_UNGREEDY", "PCRE_REGEX_RAW", "PCRE_REGEX_NO_AUTO_CAPTURE",
    "PCRE_REGEX_OPTIMIZE", "PCRE_REGEX_DUPNAMES", "PCRE_REGEX_NEWLINE_CR",
    "PCRE_REGEX_NEWLINE_LF", "PCRE_REGEX_NEWLINE_CRLF",

    "_pcreEasyMatch",
    "_pcreEasySplit",
    "_pcreEasyReplace",

    "_pcreNormalMatch",
    "_pcreNormalSplit",
    "_pcreNormalReplace"
};

int (*glib_pcre_fun[GLIB_PCRE_PKG_NB])(mmachine m)=
{
    PCRE_MATCH_STANDARD, PCRE_MATCH_DFA,

    PCRE_MATCH_ANCHORED, PCRE_MATCH_NOTBOL, PCRE_MATCH_NOTEOL,
    PCRE_MATCH_NOTEMPTY, PCRE_MATCH_PARTIAL, PCRE_MATCH_NEWLINE_CR,
    PCRE_MATCH_NEWLINE_LF, PCRE_MATCH_NEWLINE_CRLF, PCRE_MATCH_NEWLINE_ANY,

    PCRE_REGEX_CASELESS, PCRE_REGEX_MULTILINE, PCRE_REGEX_DOTALL,
    PCRE_REGEX_EXTENDED, PCRE_REGEX_ANCHORED, PCRE_REGEX_DOLLAR_ENDONLY,
    PCRE_REGEX_UNGREEDY, PCRE_REGEX_RAW, PCRE_REGEX_NO_AUTO_CAPTURE,
    PCRE_REGEX_OPTIMIZE, PCRE_REGEX_DUPNAMES, PCRE_REGEX_NEWLINE_CR,
    PCRE_REGEX_NEWLINE_LF, PCRE_REGEX_NEWLINE_CRLF,

    SCOL_pcreEasyMatch,
    SCOL_pcreEasySplit,
    SCOL_pcreEasyReplace,

    SCOL_pcreNormalMatch,
    SCOL_pcreNormalSplit,
    SCOL_pcreNormalReplace
};

int glib_pcre_narg[GLIB_PCRE_PKG_NB]=
{
    0, 0,

    0, 0, 0,
    0, 0, 0,
    0, 0, 0,

    0, 0, 0,
    0, 0, 0,
    0, 0, 0,
    0, 0, 0,
    0, 0,

    2,
    2,
    3,

    6,
    6,
    6
};

char* glib_pcre_type[GLIB_PCRE_PKG_NB]=
{
    "fun [] I", "fun [] I",

    "fun [] I", "fun [] I", "fun [] I",
    "fun [] I", "fun [] I", "fun [] I",
    "fun [] I", "fun [] I", "fun [] I",

    "fun [] I", "fun [] I", "fun [] I",
    "fun [] I", "fun [] I", "fun [] I",
    "fun [] I", "fun [] I", "fun [] I",
    "fun [] I", "fun [] I", "fun [] I",
    "fun [] I", "fun [] I",

    "fun [S S] I",
    "fun [S S] [S r1]",
    "fun [S S S] S",

    "fun [S S I I I I] [[S I I] r1]",
    "fun [S S I I I I] [S r1]",
    "fun [S S S I I I] S"
};

/**
 * \brief Load the Scol api
 */
int SCOLinitPcreClass (mmachine m)
{
    int k;

    MMechostr (0, "SCOLinitPcreClass : entering\n");

    k = PKhardpak (m, "PCREengine", GLIB_PCRE_PKG_NB, glib_pcre_name, glib_pcre_fun, glib_pcre_narg, glib_pcre_type);
    return k;
}


/**
 * \brief Load and free the regular expression library
 * Plateforms supported : MS Windows and GNU / Linux
 */

int PcreRelease ()
{
    MMechostr (0, "\nPCRE library released !\n");
    return 0;
}




#if ((defined _WIN32) || (defined __WIN32__))
# define SCOL_GBASE_PLUGIN_EXPORT __declspec (dllexport)
#elif ((defined linux) || (defined __linux))
# define SCOL_GBASE_PLUGIN_EXPORT
#else
# error no platform supported
#endif

SCOL_GBASE_PLUGIN_EXPORT int SCOLloadGBASE(mmachine m, cbmachine w)
{
    ww = w;
    mm = m;

    SCOLinitplugin(w);
    return SCOLinitPcreClass (m);
}

SCOL_GBASE_PLUGIN_EXPORT int SCOLfreeGBASE()
{
    PcreRelease ();
    return 0;
}

