Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

JRegex.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: JRegex.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.15 $       $Date: 2024/03/01 02:01:37 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *  Interface for performing regular expression pattern matching, 
00019  *  encapsulating the PCRE regular expression package.
00020  ***************************************************************************/
00021 
00022 //
00023 // Online regex info and testing: 
00024 //   https://regex101.com/
00025 //
00026 // PCRE regex library:
00027 //   https://www.pcre.org/
00028 //
00029 
00030 #include "JRegex.h"
00031 #include "Inform.h" 
00032 
00033 #if defined(VMDUSEPCRE2)
00034 #define  PCRE2_CODE_UNIT_WIDTH 8
00035 #include "pcre2.h"
00036 #else
00037 #include "pcre.h"
00038 #endif
00039 
00040 JRegex::JRegex(const char *pattern, int) {
00041   if (pattern == NULL) {
00042     msgErr << "NULL pattern passed to JRegex!" << sendmsg;
00043   }
00044   else {
00045 #if defined(VMDUSEPCRE2)
00046     //
00047     // PCRE2 API
00048     //   https://www.pcre.org/current/doc/html/pcre2api.html
00049     //   https://www.pcre.org/current/doc/html/pcre2demo.html
00050     //
00051     int errornumber=0;
00052     size_t erroroffset=0;
00053     md = NULL;
00054     JIT=0;
00055     rpat = pcre2_compile((PCRE2_SPTR) pattern,   // the regex pattern string
00056                          PCRE2_ZERO_TERMINATED,  // C style string
00057                          0,                      // default options
00058                          &errornumber,           // error number
00059                          &erroroffset,           // offset to error location
00060                          NULL);                  // use default compile context
00061 
00062     if (rpat == NULL) {
00063       PCRE2_UCHAR errbuf[256];
00064       pcre2_get_error_message(errornumber, errbuf, sizeof(errbuf));
00065       msgWarn << "JRegex: Error in pcre2_compile(), " << sendmsg;
00066       msgWarn << "Error in regex pattern begins with " << pattern+erroroffset
00067               << sendmsg;
00068     } else {
00069 #if 1
00070       //
00071       // enable JIT compilation of the regex, for large selection traversals
00072       // https://www.pcre.org/current/doc/html/pcre2jit.html
00073       //
00074       int rc=0;
00075       rc=pcre2_jit_compile((pcre2_code *) rpat, PCRE2_JIT_COMPLETE); 
00076       if (rc && rc != PCRE2_ERROR_JIT_BADOPTION) 
00077         msgWarn << "JRegex: pcre2_jit_compile() returned an error." << sendmsg;
00078 
00079       // check that JIT succeeded so we can use the fast-path if possible
00080       if (!rc) {
00081         size_t len=0;
00082         pcre2_pattern_info((pcre2_code *) rpat, PCRE2_INFO_JITSIZE, &len);
00083 //        msgInfo << "PCRE2 JIT size: " << len << sendmsg;
00084         if (len != 0)
00085           JIT=1;
00086       }
00087 #endif
00088 
00089 //      md = pcre2_match_data_create(1, NULL);
00090       md = pcre2_match_data_create_from_pattern((pcre2_code *) rpat, NULL);
00091     }
00092 #else
00093     const char *errptr;
00094     int erroffset;
00095     rpat = vmdpcre_compile(pattern,    // the regex pattern string
00096                            0,          // options
00097                            &errptr,    // points to error message, if any
00098                            &erroffset, // offset to error location
00099                            NULL);      // Table pointer; NULL for use default
00100     if (rpat == NULL) {
00101       msgWarn << "JRegex: Error in pcre_compile(), " << errptr << sendmsg;
00102       msgWarn << "Error in regex pattern begins with " << pattern+erroffset
00103               << sendmsg;
00104     }
00105 #endif
00106   }
00107 }
00108 
00109 JRegex::~JRegex() {
00110 #if defined(VMDUSEPCRE2)
00111   if (md)
00112     pcre2_match_data_free((pcre2_match_data *) md);  
00113   if (rpat)
00114     pcre2_code_free((pcre2_code *) rpat);
00115 #else
00116   vmdpcre_free(rpat);
00117 #endif
00118 }
00119 
00120 int JRegex::match(const char *str, int len) const {
00121 #if defined(VMDUSEPCRE2)
00122   if (rpat==NULL || md == NULL) {
00123 //  msgWarn << "JRegex::match: bad regex pattern, no match" << sendmsg;
00124     return -1;
00125   } 
00126 
00127   int rc=0;
00128 
00129   // if the regular expression has successfully JITted, we call the 
00130   // JIT fast path to avoid overheads
00131   if (JIT) {
00132     rc=pcre2_jit_match((pcre2_code *) rpat,     // compiled regex pattern
00133                        (PCRE2_SPTR) str,        // subject of the search
00134                        len,                     // strlen of str
00135                        0,                       // match starting offset
00136                        0,                       // options
00137                        (pcre2_match_data *) md, // match data block
00138                        NULL);                   // match ctx, NULL for defaults
00139     return rc;
00140   }
00141 
00142   rc=pcre2_match((pcre2_code *) rpat,     // compiled regex pattern
00143                  (PCRE2_SPTR) str,        // subject of the search
00144                  len,                     // strlen of str
00145                  0,                       // match starting offset
00146                  0,                       // options
00147                  (pcre2_match_data *) md, // match data block
00148                  NULL);                   // match ctx, NULL for defaults
00149   return rc;
00150 #else
00151   if (rpat==NULL) {
00152 //  msgWarn << "JRegex::match: bad regex pattern, no match" << sendmsg;
00153     return -1;
00154   } 
00155   int retval;
00156   retval=vmdpcre_exec(rpat, // compiled regex pattern
00157                       NULL, // No extra study wisdom
00158                       str,  // subject of the search
00159                       len,  // strlen of str
00160                       0,    // offset at which to start finding substrings
00161                       0,    // options
00162                       NULL, // return vector for location of substrings
00163                       0);   // size of return vector
00164   return retval;
00165 #endif
00166 }
00167 
00168 int JRegex::search(const char *str, int len, int &length, int start) {
00169 #if defined(VMDUSEPCRE2)
00170   return -1; // not implemented 
00171 #else
00172   if (rpat==NULL) {
00173 //  msgWarn << "JRegex::search: bad regex pattern, no match" << sendmsg;
00174     return -1;
00175   } 
00176   int ovec[6], retval;
00177   retval=vmdpcre_exec(rpat, // my regex pattern
00178                       NULL, // No extra study wisdom
00179                       str,  // subject of the search
00180                       len,  // strlen of str
00181                       start,// offset at which to start finding substrings
00182                       0,    // options
00183                       ovec, // return vector for location of substrings
00184                       6);   // size of return vector
00185   if (retval < 0) 
00186     return retval;
00187   length = ovec[1]-ovec[0]; 
00188   return ovec[0]; 
00189 #endif
00190 }
00191 

Generated on Thu Mar 28 02:43:20 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002