You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

225 lines
6.5 KiB
C++

/*/ problem source:
* https://www.geeksforgeeks.org/find-the-longest-substring-with-k-unique-characters-in-a-given-string/
Possible references include:
https://stackoverflow.com/a/7304184 - custom delimeter for istream
*/
#include "lib_random.h"
#include <cassert>
#include <fstream>
#include <iostream>
#include <locale>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
struct semicolon_is_space : std::ctype<char> {
// this struct/class is adapted from: https://stackoverflow.com/a/7304184
semicolon_is_space() : std::ctype<char>(get_table()){};
static mask const *get_table() {
static mask rc[table_size];
rc[(int)';'] = std::ctype_base::space;
rc[(int)'\n'] = std::ctype_base::space;
return &rc[0];
};
};
class input {
public:
std::string s = "";
unsigned long k = 1;
friend std::istream &operator>>(std::istream &in, input &i);
friend std::ostream &operator<<(std::ostream &out, const input &i);
};
std::istream &operator>>(std::istream &in, input &i) {
in >> i.s >> i.k;
return in;
}
std::ostream &operator<<(std::ostream &out, const input &i) {
out << "String:\n" << i.s << "\nK: " << i.k << std::endl;
return out;
}
class result {
public:
unsigned long longest_start = 0;
unsigned long longest_end = 0;
bool match_found = false;
bool operator==(const result r) const;
friend std::istream &operator>>(std::istream &in, result &r);
friend std::ostream &operator<<(std::ostream &out, const result &r);
};
bool result::operator==(const result r) const {
return longest_start == r.longest_start && longest_end == r.longest_end &&
match_found == r.match_found;
}
std::istream &operator>>(std::istream &in, result &r) {
in >> r.longest_start >> r.longest_end >> r.match_found;
return in;
}
std::ostream &operator<<(std::ostream &out, const result &r) {
out << "longest_start: " << r.longest_start
<< " longest_end: " << r.longest_end << " match_found: ";
if (r.match_found)
out << "true";
else
out << "false";
out << std::endl;
return out;
}
class test_case {
public:
input i;
result r;
friend std::istream &operator>>(std::istream &in, test_case &t);
friend std::ostream &operator<<(std::ostream &out, const test_case &t);
};
std::istream &operator>>(std::istream &in, test_case &t) {
in >> t.i >> t.r;
return in;
}
std::ostream &operator<<(std::ostream &out, const test_case &t) {
out << t.i << t.r;
return out;
}
result find(const std::string &s, const unsigned long k) {
result r;
std::unordered_map<char, unsigned long> char_count;
unsigned long start = 0, end = 0;
for (; end < s.length(); end++) {
char_count[s[end]] += 1;
while (char_count.size() > k) {
char_count[s[start]]--;
if (char_count[s[start]] == 0) {
char_count.erase(s[start]);
}
start++;
}
if (char_count.size() == k &&
(end - start > r.longest_end - r.longest_start ||
r.match_found == false)) {
r.longest_start = start, r.longest_end = end;
r.match_found = true;
}
}
if (r.match_found) {
std::cout << "Longest substring is \""
<< s.substr(r.longest_start, r.longest_end - r.longest_start + 1)
<< "\" with length " << r.longest_end - r.longest_start + 1
<< ".\n";
} else {
std::cout << "Could not find any match, not enough unique characters.\n";
}
return r;
}
void test(const test_case &t) {
std::cout << t;
if (find(t.i.s, t.i.k) == t.r) {
std::cout << "Test case with string \"" << t.i.s << "\" and k=" << t.i.k
<< " passed.\n";
} else {
std::cout << std::flush;
std::cerr << "TEST CASE WITH STRING \"" << t.i.s << "\" AND k=" << t.i.k
<< " FAILED.\n";
}
std::cout << "\n";
}
bool K_verify(const test_case &t) {
// Verifies if found solution has exactly K unique characters (one of the
// problem requirement)
if (!t.r.match_found)
return true; // Do not check if no match found
std::unordered_set<char> unique_chars;
for (const char c : t.i.s.substr(t.r.longest_start,
t.r.longest_end - t.r.longest_start + 1)) {
unique_chars.insert(c);
}
return unique_chars.size() == t.i.k;
}
bool thorough_test(const test_case &t) {
unsigned long org_window_size = t.r.longest_end - t.r.longest_start + 1;
if (org_window_size >= t.i.s.length())
return true;
for (unsigned long i = 0, n_tests = t.i.s.length() - org_window_size;
i < n_tests; i++) {
result r = {i, i + org_window_size, true};
test_case mock_test = {t.i, r};
if (K_verify(mock_test))
return false;
}
return true;
}
int main(int argc, char *argv[]) {
std::cout << "Processing static test cases from input file (if any)\n";
semicolon_is_space delimeter;
for (int i = 1; i < argc; i++) {
std::ifstream f(argv[i]);
f.imbue(std::locale(f.getloc(), new semicolon_is_space));
while (f.good()) {
test_case t;
f >> t;
if (t.i.s.length()) { // skip empty line/string inputs
test(t);
}
}
}
// Thorough test (100% verifiable correct) is performaned randomly in ~1%
// cases
std::cout << "\n\n\nPerforming metamorphic tests from randomly generated "
"input (string and K values).\nThorough test (100\% verifiable "
"correct) is performaned randomly in ~1\% cases."
<< std::endl;
random_number_generator<unsigned int> random_ascii_char(97, 122); // a-z
random_number_generator<unsigned int> rng;
for (unsigned int i = 0; i < 10; i++) {
test_case t;
t.i.s += (char)random_ascii_char();
t.r = find(t.i.s, t.i.k);
std::cout << t;
for (unsigned int j = 0; j < 10; j++) {
for (unsigned int k = 0, increase_str_len_by = rng(100, 1000);
k < increase_str_len_by; k++) {
t.i.s += (char)random_ascii_char();
result r = find(t.i.s, t.i.k);
if (r.match_found && t.r.match_found)
assert(r.longest_end - r.longest_start >=
t.r.longest_end - t.r.longest_start);
t.r = r;
std::cout << t;
assert(K_verify(t));
if (!rng(0, 99))
assert(thorough_test(t));
}
while (t.r.match_found) {
result r = find(t.i.s, ++t.i.k);
if (r.match_found)
assert(r.longest_end - r.longest_start >=
t.r.longest_end - t.r.longest_start);
t.r = r;
std::cout << t;
assert(K_verify(t));
if (!rng(0, 99))
assert(thorough_test(t));
}
t.i.k = rng(1, --t.i.k);
}
}
return 0;
}