Skip to content
Prev Previous commit
Next Next commit
Add RE2 regex backend
  • Loading branch information
WGH- committed Sep 5, 2020
commit 70a6833b7531c89a6f6ef39c25dd86ff827d5829
3 changes: 2 additions & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ UTILS = \

REGEX = \
regex/regex.cc \
regex/backend/pcre.cc
regex/backend/pcre.cc \
regex/backend/re2.cc


COLLECTION = \
Expand Down
115 changes: 115 additions & 0 deletions src/regex/backend/re2.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2019
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address [email protected].
*
*/
#include <iostream>
#include <fstream>
#include <string>
#include <list>

#include "src/regex/backend/re2.h"
#include "src/regex/regex_match.h"

namespace modsecurity {
namespace regex {
namespace backend {

#ifdef WITH_RE2

static RE2::Options get_re2_options() {
RE2::Options res;

res.set_dot_nl(true);

return res;
}


Re2::Re2(const std::string& pattern_)
: pattern(pattern_.empty() ? ".*" : pattern_),
re(pattern, get_re2_options())
{
}

std::list<RegexMatch> Re2::searchAll(const std::string& s) const {
std::list<RegexMatch> retList;

re2::StringPiece subject(s);

size_t offset = 0;
while (offset <= s.size()) {
int ngroups = re.NumberOfCapturingGroups() + 1;
re2::StringPiece submatches[ngroups];

if (!re.Match(subject, offset, s.size(), RE2::UNANCHORED,
&submatches[0], ngroups)) {
break;
}

for (int i = 0; i < ngroups; i++) {
// N.B. StringPiece::as_string returns value, not reference
auto match_string = submatches[i].as_string();
auto start = &submatches[i][0] - &subject[0];
retList.push_front(RegexMatch(std::move(match_string), start));
}

offset = (&submatches[0][0] - &subject[0]) + submatches[0].length();
if (submatches[0].size() == 0) {
offset++;
}
}

return retList;
}

bool Re2::searchOneMatch(const std::string& s, std::vector<RegexMatchCapture>& captures) const {
re2::StringPiece subject(s);
int ngroups = re.NumberOfCapturingGroups() + 1;
re2::StringPiece submatches[ngroups];

if (re.Match(subject, 0, s.size(), RE2::UNANCHORED, &submatches[0], ngroups)) {
for (int i = 0; i < ngroups; i++) {
auto len = submatches[i].length();
auto start = len != 0 ? &submatches[i][0] - &subject[0] : 0;
captures.push_back(RegexMatchCapture(i, start, len));
}
return true;
} else {
return false;
}
}

int Re2::search(const std::string& s, RegexMatch *match) const {
re2::StringPiece subject(s);
re2::StringPiece submatches[1];
if (re.Match(subject, 0, s.size(), RE2::UNANCHORED, &submatches[0], 1)) {
// N.B. StringPiece::as_string returns value, not reference
auto match_string = submatches[0].as_string();
auto start = &submatches[0][0] - &subject[0];
*match = RegexMatch(std::move(match_string), start);
return 1;
} else {
return 0;
}
}

int Re2::search(const std::string& s) const {
re2::StringPiece subject(s);
return re.Match(subject, 0, s.size(), RE2::UNANCHORED, NULL, 0);
}
#endif

} // namespace backend
} // namespace regex
} // namespace modsecurity

59 changes: 59 additions & 0 deletions src/regex/backend/re2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* ModSecurity, http://www.modsecurity.org/
* Copyright (c) 2019
*
* You may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address [email protected].
*
*/

#ifdef WITH_RE2
#include <re2/re2.h>
#endif

#include <string>
#include <vector>
#include <list>

#include "src/regex/regex_match.h"

#ifndef SRC_REGEX_BACKEND_RE2_H_
#define SRC_REGEX_BACKEND_RE2_H_

namespace modsecurity {
namespace regex {
namespace backend {

#ifdef WITH_RE2

class Re2 {
public:
explicit Re2(const std::string& pattern_);

// RE2 class is not copyable, so neither is this
Re2(const Re2&) = delete;
Re2& operator=(const Re2&) = delete;

std::list<RegexMatch> searchAll(const std::string& s) const;
bool searchOneMatch(const std::string& s, std::vector<RegexMatchCapture>& captures) const;
int search(const std::string &s, RegexMatch *m) const;
int search(const std::string &s) const;

const std::string pattern;
private:
const RE2 re;
};

#endif

} // namespace backend
} // namespace regex
} // namespace modsecurity

#endif // SRC_REGEX_BACKEND_PCRE_H_
3 changes: 2 additions & 1 deletion src/regex/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <vector>

#include "src/regex/backend/pcre.h"
#include "src/regex/backend/re2.h"
#include "src/regex/regex_match.h"

#ifndef SRC_REGEX_REGEX_H_
Expand All @@ -34,7 +35,7 @@ namespace regex {
#ifdef WITH_PCRE
using selectedBackend = backend::Pcre;
#elif WITH_RE2
//using selectedBackend = backend::Re2;
using selectedBackend = backend::Re2;
#else
#error "no regex backend selected"
#endif
Expand Down