Given a string s and a positive integer K, the task is to find the length of the longest contiguous substring that appears at least K times in s.
Examples:
Input: s = "abababcdabcd", K = 2
Output: 4
Explanation: "abcd" is the longest repeating substring at least K times.Input: s = "aacd", K = 4
Output: 0
Approach: This can be solved with the following idea:
We will use the Rabin-Karp algorithm to solve this problem, which is a rolling hash-based algorithm to find the longest common substring that repeats at least k times.
Below are the steps:
- Initialize the parameters: base, modulus, and power.
- Define a function for computing the rolling hash.
- Define a function for binary search to find the largest substring length.
- For each possible substring length, use the Rabin-Karp algorithm to check if it repeats at least K times.
- Return the longest substring length that meets the requirement.
Below is the implementation of the above code:
// C++ code for the above approach:
#include <algorithm>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
using namespace std;
// Define constants for the base and
// modulus of the hash function
const int BASE = 53;
const int MODULUS = 1e9 + 7;
// Define a maximum length for the
// input string
const int MAX_LENGTH = 100000;
// Function to compute the rolling hash
// for all substrings of length len in
// the input string s
long long rolling_hash(const string& s, int len,
vector<long long>& power,
vector<long long>& hashes)
{
// Initialize the hash value for the
// first substring
long long hash = 0;
for (int i = 0; i < len; ++i) {
hash = (hash * BASE + s[i]) % MODULUS;
}
// Store the hash value for the first
// substring in the hashes vector
hashes[1] = hash;
// Compute the hash value for all
// subsequent substrings using the
// rolling hash technique
for (int i = len; i < s.size(); ++i) {
hash = ((hash - power[len - 1] * s[i - len])
% MODULUS
+ MODULUS)
% MODULUS;
hash = (hash * BASE + s[i]) % MODULUS;
// Store the hash value in the
// hashes vector
hashes[i - len + 2] = hash;
}
// Return the hash value for the last
// substring
return hash;
}
// Binary search to find the length of
// the longest substring that repeats
// at least k times
int binary_search(const string& s, int k)
{
// Initialize the range of possible
// substring lengths to check
int low = 0, high = s.size();
int ans = 0;
// Initialize vectors to store the
// powers of the base and the hash
// values for all substrings
vector<long long> power(MAX_LENGTH), hashes(MAX_LENGTH);
power[0] = 1;
for (int i = 1; i < MAX_LENGTH; ++i) {
power[i] = (power[i - 1] * BASE) % MODULUS;
}
// Perform binary search on the range
// of possible substring lengths
while (low <= high) {
// Compute the midpoint of
// the range
int mid = (low + high) / 2;
// Compute the rolling hash for
// all substrings of length mid
rolling_hash(s, mid, power, hashes);
// Count the frequency of
// each hash value
unordered_map<long long, int> count;
for (int i = 1; i <= s.size() - mid + 1; ++i) {
count[hashes[i]]++;
}
// Check if any hash value
// appears at least k times
bool found = false;
for (const auto& p : count) {
if (p.second >= k) {
found = true;
break;
}
}
// If a repeating substring of
// length mid appears at least k
// times, update the answer and
// search the upper half
// of the range
if (found) {
ans = mid;
low = mid + 1;
}
else {
// Otherwise, search the
// lower half of the range
high = mid - 1;
}
}
// Return the length of the
// longest substring
return ans;
}
// Driver code
int main()
{
string s = "abababcdabcd";
int k = 2;
// Function call
int ans = binary_search(s, k);
cout << ans << endl;
return 0;
}
import java.util.HashMap;
import java.util.Map;
public class GFG {
public static void main(String[] args) {
String s = "abababcdabcd";
int k = 2;
// Function call
int ans = binarySearch(s, k);
System.out.println(ans);
}
// Binary search to find the length of
// the longest substring that repeats
// at least k times
public static int binarySearch(String s, int k) {
// Initialize the range of possible
// substring lengths to check
int low = 0;
int high = s.length();
int ans = 0;
// Perform binary search on the range
// of possible substring lengths
while (low <= high) {
// Compute the midpoint of
// the range
int mid = (low + high) / 2;
// Compute the rolling hash for
// all substrings of length mid
int[] hashes = rollingHash(s, mid);
// Count the frequency of
// each hash value
Map<Integer, Integer> count = new HashMap<>();
for (int hash : hashes) {
count.put(hash, count.getOrDefault(hash, 0) + 1);
}
// Check if any hash value
// appears at least k times
boolean found = false;
for (int value : count.values()) {
if (value >= k) {
found = true;
break;
}
}
// If a repeating substring of
// length mid appears at least k
// times, update the answer and
// search the upper half
// of the range
if (found) {
ans = mid;
low = mid + 1;
}
// Otherwise, search the
// lower half of the range
else {
high = mid - 1;
}
}
// Return the length of the
// longest substring
return ans;
}
public static int[] rollingHash(String s, int length) {
// Define constants for the base and
// modulus of the hash function
int base = 53;
int modulus = (int) (Math.pow(10, 9) + 7);
int power = 1;
int hashValue = 0;
int[] hashes = new int[s.length() - length + 1];
// Initialize the hash value for the
// first substring
for (int i = 0; i < length; i++) {
hashValue = (hashValue * base + (int) s.charAt(i)) % modulus;
power = (power * base) % modulus;
}
hashes[0] = hashValue;
// Compute the hash value for all
// subsequent substrings using the
// rolling hash technique
for (int i = length; i < s.length(); i++) {
hashValue = (hashValue * base -
(int) s.charAt(i - length) * power + (int) s.charAt(i)) % modulus;
// Store the hash value in the
// hashes vector
hashes[i - length + 1] = hashValue;
}
// Return the hash value for the last
// substring
return hashes;
}
}
# Python code for the above approach
import collections
# Function to compute the rolling hash
# for all substrings of length len in
# the input string s
def rolling_hash(s, length):
base = 53
modulus = 10**9 + 7
power = 1
hash_value = 0
hashes = []
for i in range(length):
hash_value = (hash_value * base + ord(s[i])) % modulus
power = (power * base) % modulus
hashes.append(hash_value)
for i in range(length, len(s)):
hash_value = (hash_value * base - ord(s[i - length]) * power + ord(s[i])) % modulus
hashes.append(hash_value)
return hashes
# Binary search to find the length of
# the longest substring that repeats
# at least k times
def binary_search(s, k):
low = 0
high = len(s)
ans = 0
while low <= high:
mid = (low + high) // 2
hashes = rolling_hash(s, mid)
count = collections.Counter(hashes)
found = False
for value in count.values():
if value >= k:
found = True
break
if found:
ans = mid
low = mid + 1
else:
high = mid - 1
return ans
# Driver code
if __name__ == "__main__":
s = "abababcdabcd"
k = 2
ans = binary_search(s, k)
print(ans)
using System;
using System.Collections.Generic;
public class GFG
{
public static void Main(string[] args)
{
string s = "abababcdabcd";
int k = 2;
// Function call
int ans = BinarySearch(s, k);
Console.WriteLine(ans);
}
// Binary search to find the length of
// the longest substring that repeats
// at least k times
public static int BinarySearch(string s, int k)
{
// Initialize the range of possible
// substring lengths to check
int low = 0;
int high = s.Length;
int ans = 0;
// Perform binary search on the range
// of possible substring lengths
while (low <= high)
{
// Compute the midpoint of
// the range
int mid = (low + high) / 2;
// Compute the rolling hash for
// all substrings of length mid
int[] hashes = RollingHash(s, mid);
// Count the frequency of
// each hash value
Dictionary<int, int> count = new Dictionary<int, int>();
foreach (int hash in hashes)
{
if (count.ContainsKey(hash))
count[hash]++;
else
count[hash] = 1;
}
// Check if any hash value
// appears at least k times
bool found = false;
foreach (int value in count.Values)
{
if (value >= k)
{
found = true;
break;
}
}
// If a repeating substring of
// length mid appears at least k
// times, update the answer and
// search the upper half
// of the range
if (found)
{
ans = mid;
low = mid + 1;
}
// Otherwise, search the
// lower half of the range
else
{
high = mid - 1;
}
}
// Return the length of the
// longest substring
return ans;
}
public static int[] RollingHash(string s, int length)
{
// Define constants for the base and
// modulus of the hash function
int @base = 53;
int modulus = (int)(Math.Pow(10, 9) + 7);
int power = 1;
int hashValue = 0;
int[] hashes = new int[s.Length - length + 1];
// Initialize the hash value for the
// first substring
for (int i = 0; i < length; i++)
{
hashValue = (int)(((long)hashValue * @base + (int)s[i]) % modulus);
power = (int)(((long)power * @base) % modulus);
}
hashes[0] = hashValue;
// Compute the hash value for all
// subsequent substrings using the
// rolling hash technique
for (int i = length; i < s.Length; i++)
{
hashValue = (int)((((long)hashValue * @base - (int)s[i - length] * power + (int)s[i]) % modulus + modulus) % modulus);
// Store the hash value in the
// hashes vector
hashes[i - length + 1] = hashValue;
}
// Return the hash value for the last
// substring
return hashes;
}
}
function rolling_hash(s, length) {
// Define constants for the base and
// modulus of the hash function
const base = 53;
const modulus = 10**9 + 7;
let power = 1;
// Initialize the hash value for the
// first substring
let hash_value = 0;
let hashes = [];
for (let i = 0; i < length; i++) {
hash_value = (hash_value * base + s.charCodeAt(i)) % modulus;
power = (power * base) % modulus;
}
// Store the hash value for the first
// substring in the hashes vector
hashes.push(hash_value);
// Compute the hash value for all
// subsequent substrings using the
// rolling hash technique
for (let i = length; i < s.length; i++) {
hash_value = (hash_value * base - s.charCodeAt(i - length) * power + s.charCodeAt(i)) % modulus;
// Store the hash value in the
// hashes vector
hashes.push(hash_value);
}
// Return the hash value for the last
// substring
return hashes;
}
// Binary search to find the length of
// the longest substring that repeats
// at least k times
function binary_search(s, k) {
// Initialize the range of possible
// substring lengths to check
let low = 0;
let high = s.length;
let ans = 0;
// Perform binary search on the range
// of possible substring lengths
while (low <= high) {
// Compute the midpoint of
// the range
let mid = Math.floor((low + high) / 2);
// Compute the rolling hash for
// all substrings of length mid
let hashes = rolling_hash(s, mid);
// Count the frequency of
// each hash value
let count = new Map();
for (let i = 0; i < hashes.length; i++) {
if (count.has(hashes[i])) {
count.set(hashes[i], count.get(hashes[i]) + 1);
} else {
count.set(hashes[i], 1);
}
}
// Check if any hash value
// appears at least k times
let found = false;
for (let value of count.values()) {
if (value >= k) {
found = true;
break;
}
}
// If a repeating substring of
// length mid appears at least k
// times, update the answer and
// search the upper half
// of the range
if (found) {
ans = mid;
low = mid + 1;
}
// Otherwise, search the
// lower half of the range
else {
high = mid - 1;
}
}
// Return the length of the
// longest substring
return ans;
}
// Function call
let s = "abababcdabcd";
let k = 2;
let ans = binary_search(s, k);
console.log(ans);
Output
4
Time Complexity: O(N*log(N))
Auxiliary Space: O(N)