/* * Copyright (C) 1996-2023 The Squid Software Foundation and contributors * * Squid software is distributed under GPLv2+ license and includes * contributions from numerous individuals and organizations. * Please see the COPYING and CONTRIBUTORS files for details. */ #include "squid.h" #include "base/CharacterSet.h" #include "parser/Tokenizer.h" #include "tests/testTokenizer.h" #include "unitTestMain.h" CPPUNIT_TEST_SUITE_REGISTRATION( testTokenizer ); SBuf text("GET http://resource.com/path HTTP/1.1\r\n" "Host: resource.com\r\n" "Cookie: laijkpk3422r j1noin \r\n" "\r\n"); const CharacterSet alpha("alpha","abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); const CharacterSet whitespace("whitespace"," \r\n"); const CharacterSet crlf("crlf","\r\n"); const CharacterSet tab("tab","\t"); const CharacterSet numbers("numbers","0123456789"); void testTokenizer::testTokenizerPrefix() { const SBuf canary("This text should not be changed."); Parser::Tokenizer t(text); SBuf s; CharacterSet all(whitespace); all += alpha; all += crlf; all += numbers; all.add(':').add('.').add('/'); // an empty prefix should return false (the full output buffer case) s = canary; const SBuf before = t.remaining(); CPPUNIT_ASSERT(!t.prefix(s, all, 0)); // ... and a false return value means no parameter changes CPPUNIT_ASSERT_EQUAL(canary, s); // ... and a false return value means no input buffer changes CPPUNIT_ASSERT_EQUAL(before, t.remaining()); // successful prefix tokenization CPPUNIT_ASSERT(t.prefix(s,alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); CPPUNIT_ASSERT(t.prefix(s,whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); //no match (first char is not in the prefix set) CPPUNIT_ASSERT(!t.prefix(s,whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf(" "),s); // one more match to set S to something meaningful CPPUNIT_ASSERT(t.prefix(s,alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //no match (no characters from the character set in the prefix) CPPUNIT_ASSERT(!t.prefix(s,tab)); CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //output SBuf left untouched // match until the end of the sample CPPUNIT_ASSERT(t.prefix(s,all)); CPPUNIT_ASSERT_EQUAL(SBuf(),t.remaining()); // empty prefix should return false (the empty input buffer case) s = canary; CPPUNIT_ASSERT(!t.prefix(s, all)); // ... and a false return value means no parameter changes CPPUNIT_ASSERT_EQUAL(canary, s); } void testTokenizer::testTokenizerSkip() { Parser::Tokenizer t(text); SBuf s; // first scenario: patterns match // prep for test CPPUNIT_ASSERT(t.prefix(s,alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); // test skipping one character from a character set CPPUNIT_ASSERT(t.skipOne(whitespace)); // check that skip was right CPPUNIT_ASSERT(t.prefix(s,alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("http"),s); //check skip prefix CPPUNIT_ASSERT(t.skip(SBuf("://"))); // verify CPPUNIT_ASSERT(t.prefix(s,alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("resource"),s); // no skip CPPUNIT_ASSERT(!t.skipOne(alpha)); CPPUNIT_ASSERT(!t.skip(SBuf("://"))); CPPUNIT_ASSERT(!t.skip('a')); // test skipping all characters from a character set while looking at .com CPPUNIT_ASSERT(t.skip('.')); CPPUNIT_ASSERT_EQUAL(static_cast(3), t.skipAll(alpha)); CPPUNIT_ASSERT(t.remaining().startsWith(SBuf("/path"))); } void testTokenizer::testTokenizerToken() { Parser::Tokenizer t(text); SBuf s; // first scenario: patterns match CPPUNIT_ASSERT(t.token(s,whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf("GET"),s); CPPUNIT_ASSERT(t.token(s,whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf("http://resource.com/path"),s); CPPUNIT_ASSERT(t.token(s,whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf("HTTP/1.1"),s); CPPUNIT_ASSERT(t.token(s,whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf("Host:"),s); } void testTokenizer::testTokenizerSuffix() { const SBuf canary("This text should not be changed."); Parser::Tokenizer t(text); SBuf s; CharacterSet all(whitespace); all += alpha; all += crlf; all += numbers; all.add(':').add('.').add('/'); // an empty suffix should return false (the full output buffer case) s = canary; const SBuf before = t.remaining(); CPPUNIT_ASSERT(!t.suffix(s, all, 0)); // ... and a false return value means no parameter changes CPPUNIT_ASSERT_EQUAL(canary, s); // ... and a false return value means no input buffer changes CPPUNIT_ASSERT_EQUAL(before, t.remaining()); // consume suffix until the last CRLF, including that last CRLF SBuf::size_type remaining = t.remaining().length(); while (t.remaining().findLastOf(crlf) != SBuf::npos) { CPPUNIT_ASSERT(t.remaining().length() > 0); CPPUNIT_ASSERT(t.skipOneTrailing(all)); // ensure steady progress CPPUNIT_ASSERT_EQUAL(remaining, t.remaining().length() + 1); --remaining; } // no match (last char is not in the suffix set) CPPUNIT_ASSERT(!t.suffix(s, crlf)); CPPUNIT_ASSERT(!t.suffix(s, whitespace)); // successful suffix tokenization CPPUNIT_ASSERT(t.suffix(s, numbers)); CPPUNIT_ASSERT_EQUAL(SBuf("1"), s); CPPUNIT_ASSERT(t.skipSuffix(SBuf("1."))); CPPUNIT_ASSERT(t.skipSuffix(SBuf("/"))); CPPUNIT_ASSERT(t.suffix(s, alpha)); CPPUNIT_ASSERT_EQUAL(SBuf("HTTP"), s); CPPUNIT_ASSERT(t.suffix(s, whitespace)); CPPUNIT_ASSERT_EQUAL(SBuf(" "), s); // match until the end of the sample CPPUNIT_ASSERT(t.suffix(s, all)); CPPUNIT_ASSERT_EQUAL(SBuf(), t.remaining()); // an empty buffer does not end with a token s = canary; CPPUNIT_ASSERT(!t.suffix(s, all)); CPPUNIT_ASSERT_EQUAL(canary, s); // no parameter changes // we cannot skip an empty suffix, even in an empty buffer CPPUNIT_ASSERT(!t.skipSuffix(SBuf())); } void testTokenizer::testCharacterSet() { } void testTokenizer::testTokenizerInt64() { // successful parse in base 10 { int64_t rv; Parser::Tokenizer t(SBuf("1234")); const int64_t benchmark = 1234; CPPUNIT_ASSERT(t.int64(rv, 10)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT(t.buf().isEmpty()); } // successful parse, autodetect base { int64_t rv; Parser::Tokenizer t(SBuf("1234")); const int64_t benchmark = 1234; CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT(t.buf().isEmpty()); } // successful parse, autodetect base { int64_t rv; Parser::Tokenizer t(SBuf("01234")); const int64_t benchmark = 01234; CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT(t.buf().isEmpty()); } // successful parse, autodetect base { int64_t rv; Parser::Tokenizer t(SBuf("0x12f4")); const int64_t benchmark = 0x12f4; CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT(t.buf().isEmpty()); } // API mismatch: don't eat leading space { int64_t rv; Parser::Tokenizer t(SBuf(" 1234")); CPPUNIT_ASSERT(!t.int64(rv)); CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf()); } // API mismatch: don't eat multiple leading spaces { int64_t rv; Parser::Tokenizer t(SBuf(" 1234")); CPPUNIT_ASSERT(!t.int64(rv)); CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf()); } // trailing spaces { int64_t rv; Parser::Tokenizer t(SBuf("1234 foo")); const int64_t benchmark = 1234; CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT_EQUAL(SBuf(" foo"), t.buf()); } // trailing nonspaces { int64_t rv; Parser::Tokenizer t(SBuf("1234foo")); const int64_t benchmark = 1234; CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT_EQUAL(SBuf("foo"), t.buf()); } // trailing nonspaces { int64_t rv; Parser::Tokenizer t(SBuf("0x1234foo")); const int64_t benchmark = 0x1234f; CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT_EQUAL(SBuf("oo"), t.buf()); } // overflow { int64_t rv; Parser::Tokenizer t(SBuf("1029397752385698678762234")); CPPUNIT_ASSERT(!t.int64(rv)); CPPUNIT_ASSERT_EQUAL(SBuf("1029397752385698678762234"), t.buf()); } // buffered sub-string parsing { int64_t rv; SBuf base("1029397752385698678762234"); const int64_t benchmark = 22; Parser::Tokenizer t(base.substr(base.length()-4,2)); CPPUNIT_ASSERT_EQUAL(SBuf("22"),t.buf()); CPPUNIT_ASSERT(t.int64(rv)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT(t.buf().isEmpty()); } // base-16, prefix { int64_t rv; SBuf base("deadbeefrow"); const int64_t benchmark=0xdeadbeef; Parser::Tokenizer t(base); CPPUNIT_ASSERT(t.int64(rv,16)); CPPUNIT_ASSERT_EQUAL(benchmark,rv); CPPUNIT_ASSERT_EQUAL(SBuf("row"),t.buf()); } }