2023-02-22 15:03:22 +01:00
|
|
|
/*/ problem source:
|
|
|
|
* https://www.geeksforgeeks.org/find-the-longest-substring-with-k-unique-characters-in-a-given-string/
|
2023-02-23 09:28:02 +01:00
|
|
|
|
|
|
|
Possible references include:
|
|
|
|
https://stackoverflow.com/a/7304184 - custom delimeter for istream
|
2023-02-22 15:03:22 +01:00
|
|
|
*/
|
2023-02-25 02:11:38 +01:00
|
|
|
#include "lib_random.h"
|
|
|
|
#include <cassert>
|
2023-02-23 09:28:02 +01:00
|
|
|
#include <fstream>
|
2023-02-22 15:03:22 +01:00
|
|
|
#include <iostream>
|
2023-02-23 09:28:02 +01:00
|
|
|
#include <locale>
|
|
|
|
#include <sstream>
|
|
|
|
#include <string>
|
2023-02-22 15:03:22 +01:00
|
|
|
#include <unordered_map>
|
2023-02-25 02:11:38 +01:00
|
|
|
#include <unordered_set>
|
2023-02-22 15:03:22 +01:00
|
|
|
#include <vector>
|
|
|
|
|
2023-02-23 09:28:02 +01:00
|
|
|
struct semicolon_is_space : std::ctype<char> {
|
|
|
|
// this struct/class is adapted from: https://stackoverflow.com/a/7304184
|
|
|
|
semicolon_is_space() : std::ctype<char>(get_table()){};
|
|
|
|
static mask const *get_table() {
|
|
|
|
static mask rc[table_size];
|
|
|
|
rc[(int)';'] = std::ctype_base::space;
|
|
|
|
rc[(int)'\n'] = std::ctype_base::space;
|
|
|
|
return &rc[0];
|
|
|
|
};
|
|
|
|
};
|
2023-02-24 08:21:14 +01:00
|
|
|
|
2023-02-23 09:28:02 +01:00
|
|
|
class input {
|
|
|
|
public:
|
2023-02-25 02:11:38 +01:00
|
|
|
std::string s = "";
|
|
|
|
unsigned long k = 1;
|
2023-02-23 09:28:02 +01:00
|
|
|
friend std::istream &operator>>(std::istream &in, input &i);
|
|
|
|
friend std::ostream &operator<<(std::ostream &out, const input &i);
|
|
|
|
};
|
|
|
|
|
|
|
|
std::istream &operator>>(std::istream &in, input &i) {
|
2023-02-24 08:21:14 +01:00
|
|
|
in >> i.s >> i.k;
|
2023-02-23 09:28:02 +01:00
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::ostream &operator<<(std::ostream &out, const input &i) {
|
|
|
|
out << "String:\n" << i.s << "\nK: " << i.k << std::endl;
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2023-02-22 15:03:22 +01:00
|
|
|
class result {
|
|
|
|
public:
|
|
|
|
unsigned long longest_start = 0;
|
|
|
|
unsigned long longest_end = 0;
|
|
|
|
bool match_found = false;
|
2023-02-26 00:11:06 +01:00
|
|
|
bool operator==(const result r) const;
|
2023-02-23 09:28:02 +01:00
|
|
|
friend std::istream &operator>>(std::istream &in, result &r);
|
|
|
|
friend std::ostream &operator<<(std::ostream &out, const result &r);
|
2023-02-22 15:03:22 +01:00
|
|
|
};
|
|
|
|
|
2023-02-26 00:11:06 +01:00
|
|
|
bool result::operator==(const result r) const {
|
2023-02-23 09:28:02 +01:00
|
|
|
return longest_start == r.longest_start && longest_end == r.longest_end &&
|
|
|
|
match_found == r.match_found;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::istream &operator>>(std::istream &in, result &r) {
|
|
|
|
in >> r.longest_start >> r.longest_end >> r.match_found;
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::ostream &operator<<(std::ostream &out, const result &r) {
|
|
|
|
out << "longest_start: " << r.longest_start
|
|
|
|
<< " longest_end: " << r.longest_end << " match_found: ";
|
|
|
|
if (r.match_found)
|
|
|
|
out << "true";
|
|
|
|
else
|
|
|
|
out << "false";
|
|
|
|
out << std::endl;
|
|
|
|
return out;
|
|
|
|
}
|
2023-02-22 15:03:22 +01:00
|
|
|
|
2023-02-23 09:28:02 +01:00
|
|
|
class test_case {
|
|
|
|
public:
|
2023-02-22 15:03:22 +01:00
|
|
|
input i;
|
|
|
|
result r;
|
2023-02-23 09:28:02 +01:00
|
|
|
friend std::istream &operator>>(std::istream &in, test_case &t);
|
|
|
|
friend std::ostream &operator<<(std::ostream &out, const test_case &t);
|
2023-02-22 15:03:22 +01:00
|
|
|
};
|
|
|
|
|
2023-02-23 09:28:02 +01:00
|
|
|
std::istream &operator>>(std::istream &in, test_case &t) {
|
|
|
|
in >> t.i >> t.r;
|
|
|
|
return in;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::ostream &operator<<(std::ostream &out, const test_case &t) {
|
|
|
|
out << t.i << t.r;
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2023-02-22 15:03:22 +01:00
|
|
|
result find(const std::string &s, const unsigned long k) {
|
|
|
|
result r;
|
|
|
|
std::unordered_map<char, unsigned long> char_count;
|
|
|
|
unsigned long start = 0, end = 0;
|
|
|
|
for (; end < s.length(); end++) {
|
|
|
|
char_count[s[end]] += 1;
|
|
|
|
while (char_count.size() > k) {
|
|
|
|
char_count[s[start]]--;
|
|
|
|
if (char_count[s[start]] == 0) {
|
|
|
|
char_count.erase(s[start]);
|
|
|
|
}
|
|
|
|
start++;
|
|
|
|
}
|
|
|
|
if (char_count.size() == k &&
|
2023-02-25 02:28:15 +01:00
|
|
|
(end - start > r.longest_end - r.longest_start ||
|
|
|
|
r.match_found == false)) {
|
2023-02-22 15:03:22 +01:00
|
|
|
r.longest_start = start, r.longest_end = end;
|
|
|
|
r.match_found = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r.match_found) {
|
|
|
|
std::cout << "Longest substring is \""
|
|
|
|
<< s.substr(r.longest_start, r.longest_end - r.longest_start + 1)
|
|
|
|
<< "\" with length " << r.longest_end - r.longest_start + 1
|
|
|
|
<< ".\n";
|
|
|
|
} else {
|
|
|
|
std::cout << "Could not find any match, not enough unique characters.\n";
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2023-02-25 02:11:38 +01:00
|
|
|
void test(const test_case &t) {
|
|
|
|
std::cout << t;
|
|
|
|
if (find(t.i.s, t.i.k) == t.r) {
|
|
|
|
std::cout << "Test case with string \"" << t.i.s << "\" and k=" << t.i.k
|
|
|
|
<< " passed.\n";
|
|
|
|
} else {
|
|
|
|
std::cout << std::flush;
|
|
|
|
std::cerr << "TEST CASE WITH STRING \"" << t.i.s << "\" AND k=" << t.i.k
|
|
|
|
<< " FAILED.\n";
|
|
|
|
}
|
|
|
|
std::cout << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
bool K_verify(const test_case &t) {
|
|
|
|
// Verifies if found solution has exactly K unique characters (one of the
|
|
|
|
// problem requirement)
|
|
|
|
if (!t.r.match_found)
|
|
|
|
return true; // Do not check if no match found
|
|
|
|
std::unordered_set<char> unique_chars;
|
|
|
|
for (const char c : t.i.s.substr(t.r.longest_start,
|
|
|
|
t.r.longest_end - t.r.longest_start + 1)) {
|
|
|
|
unique_chars.insert(c);
|
|
|
|
}
|
|
|
|
return unique_chars.size() == t.i.k;
|
|
|
|
}
|
|
|
|
|
2023-02-26 00:11:06 +01:00
|
|
|
bool thorough_test(const test_case &t) {
|
|
|
|
unsigned long org_window_size = t.r.longest_end - t.r.longest_start + 1;
|
|
|
|
if (org_window_size >= t.i.s.length())
|
|
|
|
return true;
|
|
|
|
for (unsigned long i = 0, n_tests = t.i.s.length() - org_window_size;
|
|
|
|
i < n_tests; i++) {
|
|
|
|
result r = {i, i + org_window_size, true};
|
|
|
|
test_case mock_test = {t.i, r};
|
|
|
|
if (K_verify(mock_test))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2023-02-23 09:28:02 +01:00
|
|
|
int main(int argc, char *argv[]) {
|
2023-02-25 02:11:38 +01:00
|
|
|
std::cout << "Processing static test cases from input file (if any)\n";
|
2023-02-23 09:28:02 +01:00
|
|
|
semicolon_is_space delimeter;
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
|
|
std::ifstream f(argv[i]);
|
|
|
|
f.imbue(std::locale(f.getloc(), new semicolon_is_space));
|
|
|
|
while (f.good()) {
|
2023-02-24 08:21:14 +01:00
|
|
|
test_case t;
|
2023-02-23 09:28:02 +01:00
|
|
|
f >> t;
|
|
|
|
if (t.i.s.length()) { // skip empty line/string inputs
|
2023-02-25 02:11:38 +01:00
|
|
|
test(t);
|
|
|
|
}
|
|
|
|
}
|
2023-02-26 00:11:06 +01:00
|
|
|
}
|
2023-02-25 02:11:38 +01:00
|
|
|
|
2023-02-26 00:11:06 +01:00
|
|
|
// Thorough test (100% verifiable correct) is performaned randomly in ~1%
|
|
|
|
// cases
|
|
|
|
std::cout << "\n\n\nPerforming metamorphic tests from randomly generated "
|
|
|
|
"input (string and K values).\nThorough test (100\% verifiable "
|
|
|
|
"correct) is performaned randomly in ~1\% cases."
|
|
|
|
<< std::endl;
|
|
|
|
random_number_generator<unsigned int> random_ascii_char(97, 122); // a-z
|
|
|
|
random_number_generator<unsigned int> rng;
|
|
|
|
for (unsigned int i = 0; i < 10; i++) {
|
|
|
|
test_case t;
|
|
|
|
t.i.s += (char)random_ascii_char();
|
|
|
|
t.r = find(t.i.s, t.i.k);
|
|
|
|
std::cout << t;
|
|
|
|
for (unsigned int j = 0; j < 10; j++) {
|
|
|
|
for (unsigned int k = 0, increase_str_len_by = rng(100, 1000);
|
|
|
|
k < increase_str_len_by; k++) {
|
|
|
|
t.i.s += (char)random_ascii_char();
|
|
|
|
result r = find(t.i.s, t.i.k);
|
|
|
|
if (r.match_found && t.r.match_found)
|
|
|
|
assert(r.longest_end - r.longest_start >=
|
|
|
|
t.r.longest_end - t.r.longest_start);
|
|
|
|
t.r = r;
|
|
|
|
std::cout << t;
|
|
|
|
assert(K_verify(t));
|
|
|
|
if (!rng(0, 99))
|
|
|
|
assert(thorough_test(t));
|
|
|
|
}
|
|
|
|
while (t.r.match_found) {
|
|
|
|
result r = find(t.i.s, ++t.i.k);
|
|
|
|
if (r.match_found)
|
|
|
|
assert(r.longest_end - r.longest_start >=
|
|
|
|
t.r.longest_end - t.r.longest_start);
|
|
|
|
t.r = r;
|
|
|
|
std::cout << t;
|
|
|
|
assert(K_verify(t));
|
|
|
|
if (!rng(0, 99))
|
|
|
|
assert(thorough_test(t));
|
2023-02-23 09:28:02 +01:00
|
|
|
}
|
2023-02-26 00:11:06 +01:00
|
|
|
t.i.k = rng(1, --t.i.k);
|
2023-02-23 09:28:02 +01:00
|
|
|
}
|
|
|
|
}
|
2023-02-22 15:03:22 +01:00
|
|
|
return 0;
|
|
|
|
}
|