#! stap -p5 global n global pass, fail @define check (code, regexp, str) %( result = (@str =~ @regexp); n++; if (result == !@code) { printf("regex PASS: #%d: %s %s %s\n", n, @regexp, (@code ? "!~" : "=~"), @str); pass++ } else { printf("regex FAIL: #%d: %s %s %s\n", n, @regexp, (@code ? "!~" : "=~"), @str); fail++ } %) probe begin { /* from former systemtap.base/regexp.exp */ @check(0, "\\\\", "\\") @check(0, "abc", "xabcy") @check(0, "ab*bc", "abbbbc") @check(0, "ab?bc", "abbc") @check(1, "^abc$", "abcc") @check(1, "a[b-d]e", "abd") @check(0, "a[b-d]e", "ace") @check(0, "a\\(*b", "ab") @check(0, "a\\(*b", "a((b") @check(0, "(a+|b)*", "ab") @check(0, "(a+|b)+", "ab") @check(0, "([abc])*d", "abbbcd") @check(1, "^(ab|cd)e", "abcde") @check(0, "[A-Za-z_][A-Za-z0-9_]*", "alpha") @check(0, "(bc+d$|ef*g.|h?i(j|k))", "ij") @check(1, "(bc+d$|ef*g.|h?i(j|k))", "effg") @check(0, "(((((((((a)))))))))", "a") @check(1, "\\((.*),", "(.*)\\)") @check(1, "[k]", "ab") @check(0, "abcd", "abcd") @check(0, "a(bc)d", "abcd") /* from former re2c-migrate/regcomp.base/regtest.in.0 */ # 0 means match should succeed: @check(0, "", "") @check(0, "s*", "ssss") # 1 means match should fail: @check(1, "abc", "zb") @check(1, "abc", "ab") # 2 means regex compilation should fail (cleanly): [[unsupported]] # @check(2,"[","abc") # initial tests when trying to figure out left anchoring @check(0,"","") @check(0,"","abc") @check(0,"^abc","abc") @check(0,"^","abc") @check(1,"a^","abc") @check(1,"^abc","zabc") @check(1,"ab^c","abc") @check(0,"a*^b","b") # matching should not be left-anchored by default @check(0,"abc","zabc") # indefinite repetition should not fail via reading past the end @check(0,".*","stuffs") # test cases for PR15063 (dealing with ^$ subtleties) @check(1,"$^","") @check(0,"$.*","") @check(1,"$a^","a") @check(0,"$.*","a") @check(1,"$.+","a") @check(0,".*^","") @check(0,".*^","a") @check(1,".+^","a") # according to the ERE standard (http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html#tag_09_04_09) the following should NEVER match @check(1,"a^b","") @check(1,"a^b","ab") @check(1,"a^b","a") @check(1,"e$f","") @check(1,"e$f","ef") @check(1,"e$f","e") # attempts to pinpoint chr class errors @check(0,"[^ab]","c") @check(0,"[^ab]*","") @check(0,"[ab]*","ababab") # be sure overflow behaves nicely # @check(2,"a{129490281098409821098209381020972091420947092174092717092}","aaa") # attempts to to pinpoint FAIL: regtest 0:^a(bc+|b[eh])g|.h$:abh @check(0,"h$","abh") @check(0,".h$","abh") @check(0,"a|.h$","abh") @check(0,"^a|.h$","abh") @check(0,"^ag|.h$","abh") @check(0,"^a(bc+)g|.h$","abh") @check(0,"a(bc+|b[eh])g|.h$","abh") @check(0,"^a(bc)g|.h","abh") @check(0,"^a(bc+)g|.h","abh") @check(0,"^(a)|.h$","abh") @check(0,"^ag|.h$","agh") @check(0,"^a(bc)g|.h$","abh") @check(0,"^a(b[eh])g|.h$","abh") @check(0,"^a(bc+|b[eh])g|.h$","abh") @check(0,"^a(bc|be)g|.h$","abh") @check(0,"^a(bc+|b[eh])g|h$","abh") @check(0,"^|$","") @check(0,"a|$","") @check(0,"^a|$","") @check(0,"^a|h$","h") @check(0,"^ag|h$","h") @check(0,"^acg|.h$","ah") @check(0,"^ab|.h$","ah") @check(0,"^acg|.h$","abh") # PR15064 -- additional (named-)chr-class tests # extra character classes, using re2c's octals # TODOXXX correct matching for cntrl @check(1,"^[[:cntrl:]]$","q") @check(0,"^[[:graph:]]+$","excibitobabble") @check(1,"^[[:graph:]]+$","many works many words") # TODOXXX incorrect matching for graph @check(0,"^[[:print:]]+$","excibitobabble") @check(0,"^[[:print:]]+$","many works many words") # TODOXXX incorrect matching for print # quoting various symbols, especially the [: :] brackets @check(0,"[\\^a]","a") @check(1,"[\\^a]","b") @check(0,"[\\[:alpha:]]",":]") @check(1,"[\\[:alpha:]]",":") @check(0,"[\\[:alpha:]",":") @check(0,"[\\[\\:alpha:]",":") # @check(2,"[[:alpha\\:]]","a") /* from former re2c-migrate/regcomp.base/regtest.in.1 */ ################################## # testsuite courtesy of glibc 2.17 @check(0,"^","") @check(0,"$","") @check(0,"^$","") @check(0,"^a$","a") @check(0,"abc","abc") @check(1,"abc","xbc") @check(1,"abc","axc") @check(1,"abc","abx") @check(0,"abc","xabcy") @check(0,"abc","ababc") @check(0,"ab*c","abc") @check(0,"ab*bc","abc") @check(0,"ab*bc","abbc") @check(0,"ab*bc","abbbbc") @check(0,"ab+bc","abbc") @check(1,"ab+bc","abc") @check(1,"ab+bc","abq") @check(0,"ab+bc","abbbbc") @check(0,"ab?bc","abbc") @check(0,"ab?bc","abc") @check(1,"ab?bc","abbbbc") @check(0,"ab?c","abc") @check(0,"^abc$","abc") @check(1,"^abc$","abcc") @check(0,"^abc","abcc") @check(1,"^abc$","aabc") @check(0,"abc$","aabc") @check(0,"^","abc") @check(0,"$","abc") @check(0,"a.c","abc") @check(0,"a.c","axc") @check(0,"a.*c","axyzc") @check(1,"a.*c","axyzd") @check(1,"a[bc]d","abc") @check(0,"a[bc]d","abd") @check(1,"a[b-d]e","abd") @check(0,"a[b-d]e","ace") @check(0,"a[b-d]","aac") @check(0,"a[-b]","a-") @check(0,"a[b-]","a-") # @check(2,"a[b-a]","-") # @check(2,"a[]b","-") # @check(2,"a[","-") @check(0,"a]","a]") @check(0,"a[]]b","a]b") @check(0,"a[^bc]d","aed") @check(1,"a[^bc]d","abd") @check(0,"a[^-b]c","adc") @check(1,"a[^-b]c","a-c") @check(1,"a[^]b]c","a]c") @check(0,"a[^]b]c","adc") @check(0,"ab|cd","abc") @check(0,"ab|cd","abcd") @check(0,"()ef","def") @check(0,"()*","-") # @check(2,"*a","-") # @check(2,"^*","-") # @check(2,"$*","-") # @check(2,"(*)b","-") @check(1,"$b","b") # @check(2,"a\"","-") @check(0,"a\\(b","a(b") @check(0,"a\\(*b","ab") @check(0,"a\\(*b","a((b") @check(1,"a\\x","a\\x") @check(1,"abc)","-") # @check(2,"(abc","-") @check(0,"((a))","abc") @check(0,"(a)b(c)","abc") @check(0,"a+b+c","aabbabc") @check(0,"a**","-") @check(0,"a*?","-") @check(0,"(a*)*","-") @check(0,"(a*)+","-") @check(0,"(a|)*","-") @check(0,"(a*|b)*","-") @check(0,"(a+|b)*","ab") @check(0,"(a+|b)+","ab") @check(0,"(a+|b)?","ab") @check(0,"[^ab]*","cde") @check(0,"(^)*","-") @check(0,"(ab|)*","-") # @check(2,")(","-") @check(1,"abc","") @check(1,"abc","") @check(0,"a*","") @check(0,"([abc])*d","abbbcd") @check(0,"([abc])*bcd","abcd") @check(0,"a|b|c|d|e","e") @check(0,"(a|b|c|d|e)f","ef") @check(0,"((a*|b))*","-") @check(0,"abcd*efg","abcdefg") @check(0,"ab*","xabyabbbz") @check(0,"ab*","xayabbbz") @check(0,"(ab|cd)e","abcde") @check(0,"[abhgefdc]ij","hij") @check(1,"^(ab|cd)e","abcde") @check(0,"(abc|)ef","abcdef") @check(0,"(a|b)c*d","abcd") @check(0,"(ab|ab*)bc","abc") @check(0,"a([bc]*)c*","abc") @check(0,"a([bc]*)(c*d)","abcd") @check(0,"a([bc]+)(c*d)","abcd") @check(0,"a([bc]*)(c+d)","abcd") @check(0,"a[bcd]*dcdcde","adcdcde") @check(1,"a[bcd]+dcdcde","adcdcde") @check(0,"(ab|a)b*c","abc") @check(0,"((a)(b)c)(d)","abcd") @check(0,"[A-Za-z_][A-Za-z0-9_]*","alpha") @check(0,"^a(bc+|b[eh])g|.h$","abh") @check(0,"(bc+d$|ef*g.|h?i(j|k))","effgz") @check(0,"(bc+d$|ef*g.|h?i(j|k))","ij") @check(1,"(bc+d$|ef*g.|h?i(j|k))","effg") @check(1,"(bc+d$|ef*g.|h?i(j|k))","bcdd") @check(0,"(bc+d$|ef*g.|h?i(j|k))","reffgz") @check(1,"((((((((((a))))))))))","-") @check(0,"(((((((((a)))))))))","a") @check(1,"multiple words of text","uh-uh") @check(0,"multiple words","multiple words, yeah") @check(0,"(.*)c(.*)","abcde") @check(1,"\\((.*),","(.*)\\)") @check(1,"[k]","ab") @check(0,"abcd","abcd") @check(0,"a(bc)d","abcd") @check(0,"a[-]?c","ac") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Qaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mo'ammar Gadhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Kaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Qadhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moammar El Kadhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Gadafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'ammar al-Qadafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moamer El Kazzafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moamar al-Gaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'ammar Al Qathafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Al Qathafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mo'ammar el-Gadhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moamar El Kadhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar al-Qadhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'ammar al-Qadhdhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'ammar Qadafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moamar Gaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'ammar Qadhdhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Khaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar al-Khaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'amar al-Kadafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Ghaddafy") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Ghadafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Ghaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muamar Kaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Quathafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muammar Gheddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Muamar Al-Kaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moammar Khadafy ") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Moammar Qudhafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mu'ammar al-Qaddafi") @check(0,"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]","Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi") # PR15064 -- character classes # NB: we escape : as \: to allow field splitting @check(0,"[[:digit:]]+","01234") @check(1,"[[:alpha:]]+","01234") @check(0,"^[[:digit:]]*$","01234") @check(1,"^[[:digit:]]*$","01234a") @check(0,"^[[:alnum:]]*$","01234a") @check(0,"^[[:xdigit:]]*$","01234a") @check(1,"^[[:xdigit:]]*$","01234g") @check(0,"^[[:alnum:][:space:]]*$","Hello world") # XXX: subexpression reuse not supported and probably won't be # @check(0,"(.*)*\1","xx") # @check(0,"(....).*\1","beriberi") # PR23608 -- rebalance priority for longer regex @check(1, "/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.181-3.b13.el7_5.x86_64/", "stuff") @check(1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "stuff") @check(0, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") # and last but not least: @check(0, "du+de", "duuuuuuuuude") @check(0, "d([ou]|hu|hou)+de(tte)?s?", "duuuuuuuuude") exit() } probe end { printf ("\nregex total PASS: %d, FAIL: %d\n", pass, fail) if (fail > 0) error ("Oops") }