Find length of repeating Substring

Last Updated : 14 Aug, 2023

Given a string s and a positive integer K, the task is to find the length of the longest contiguous substring that appears at least K times in s.

Examples:

Input: s = "abababcdabcd", K = 2
Output: 4
Explanation: "abcd" is the longest repeating substring at least K times.

Input: s = "aacd", K = 4
Output: 0

Approach: This can be solved with the following idea:

We will use the Rabin-Karp algorithm to solve this problem, which is a rolling hash-based algorithm to find the longest common substring that repeats at least k times.

Below are the steps: 

  • Initialize the parameters: base, modulus, and power.
  • Define a function for computing the rolling hash.
  • Define a function for binary search to find the largest substring length.
  • For each possible substring length, use the Rabin-Karp algorithm to check if it repeats at least K times.
  • Return the longest substring length that meets the requirement.

Below is the implementation of the above code:

C++
// C++ code for the above approach:
#include <algorithm>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>

using namespace std;

// Define constants for the base and
// modulus of the hash function
const int BASE = 53;
const int MODULUS = 1e9 + 7;

// Define a maximum length for the
// input string
const int MAX_LENGTH = 100000;

// Function to compute the rolling hash
// for all substrings of length len in
// the input string s
long long rolling_hash(const string& s, int len,
                       vector<long long>& power,
                       vector<long long>& hashes)
{

    // Initialize the hash value for the
    // first substring
    long long hash = 0;
    for (int i = 0; i < len; ++i) {
        hash = (hash * BASE + s[i]) % MODULUS;
    }

    // Store the hash value for the first
    // substring in the hashes vector
    hashes[1] = hash;

    // Compute the hash value for all
    // subsequent substrings using the
    // rolling hash technique
    for (int i = len; i < s.size(); ++i) {

        hash = ((hash - power[len - 1] * s[i - len])
                    % MODULUS
                + MODULUS)
               % MODULUS;
        hash = (hash * BASE + s[i]) % MODULUS;

        // Store the hash value in the
        // hashes vector
        hashes[i - len + 2] = hash;
    }

    // Return the hash value for the last
    // substring
    return hash;
}

// Binary search to find the length of
// the longest substring that repeats
// at least k times
int binary_search(const string& s, int k)
{
    // Initialize the range of possible
    // substring lengths to check
    int low = 0, high = s.size();
    int ans = 0;

    // Initialize vectors to store the
    // powers of the base and the hash
    // values for all substrings
    vector<long long> power(MAX_LENGTH), hashes(MAX_LENGTH);
    power[0] = 1;
    for (int i = 1; i < MAX_LENGTH; ++i) {
        power[i] = (power[i - 1] * BASE) % MODULUS;
    }

    // Perform binary search on the range
    // of possible substring lengths
    while (low <= high) {

        // Compute the midpoint of
        // the range
        int mid = (low + high) / 2;

        // Compute the rolling hash for
        // all substrings of length mid
        rolling_hash(s, mid, power, hashes);

        // Count the frequency of
        // each hash value
        unordered_map<long long, int> count;
        for (int i = 1; i <= s.size() - mid + 1; ++i) {
            count[hashes[i]]++;
        }

        // Check if any hash value
        // appears at least k times
        bool found = false;
        for (const auto& p : count) {
            if (p.second >= k) {
                found = true;
                break;
            }
        }

        // If a repeating substring of
        // length mid appears at least k
        // times, update the answer and
        // search the upper half
        // of the range
        if (found) {
            ans = mid;
            low = mid + 1;
        }
        else {

            // Otherwise, search the
            // lower half of the range
            high = mid - 1;
        }
    }

    // Return the length of the
    // longest substring
    return ans;
}

// Driver code
int main()
{

    string s = "abababcdabcd";
    int k = 2;

    // Function call
    int ans = binary_search(s, k);

    cout << ans << endl;

    return 0;
}
Java
import java.util.HashMap;
import java.util.Map;

public class GFG {
    public static void main(String[] args) {
        String s = "abababcdabcd";
        int k = 2;
      
       // Function call
        int ans = binarySearch(s, k);
        System.out.println(ans);
    }

      // Binary search to find the length of
    // the longest substring that repeats
    // at least k times
    public static int binarySearch(String s, int k) {
  
          // Initialize the range of possible
        // substring lengths to check
        int low = 0;
        int high = s.length();
        int ans = 0;
      
          // Perform binary search on the range
        // of possible substring lengths
        while (low <= high) {
          
              // Compute the midpoint of
            // the range
            int mid = (low + high) / 2;
          
              // Compute the rolling hash for
            // all substrings of length mid
            int[] hashes = rollingHash(s, mid);
          
              // Count the frequency of
            // each hash value
            Map<Integer, Integer> count = new HashMap<>();
            for (int hash : hashes) {
                count.put(hash, count.getOrDefault(hash, 0) + 1);
            }
          
              // Check if any hash value
            // appears at least k times
            boolean found = false;
            for (int value : count.values()) {
                if (value >= k) {
                    found = true;
                    break;
                }
            }
          
         // If a repeating substring of
            // length mid appears at least k
            // times, update the answer and
         // search the upper half
            // of the range
            if (found) {
                ans = mid;
                low = mid + 1;
            } 
          // Otherwise, search the
          // lower half of the range
          else {
                high = mid - 1;
            }
        }
      
          // Return the length of the
        // longest substring
        return ans;
    }

    public static int[] rollingHash(String s, int length) {
      
          // Define constants for the base and
        // modulus of the hash function
        int base = 53;
        int modulus = (int) (Math.pow(10, 9) + 7);
        
      
          int power = 1;
        int hashValue = 0;
        int[] hashes = new int[s.length() - length + 1];
      
          // Initialize the hash value for the
        // first substring
        for (int i = 0; i < length; i++) {
            hashValue = (hashValue * base + (int) s.charAt(i)) % modulus;
            power = (power * base) % modulus;
        }
        hashes[0] = hashValue;
      
          // Compute the hash value for all
        // subsequent substrings using the
        // rolling hash technique
        for (int i = length; i < s.length(); i++) {
            hashValue = (hashValue * base - 
            (int) s.charAt(i - length) * power + (int) s.charAt(i)) % modulus;
          
            // Store the hash value in the
            // hashes vector
              hashes[i - length + 1] = hashValue;
        }
      
          // Return the hash value for the last
        // substring
        return hashes;
    }
}
Python3
# Python code for the above approach

import collections

# Function to compute the rolling hash
# for all substrings of length len in
# the input string s
def rolling_hash(s, length):
    base = 53
    modulus = 10**9 + 7
    power = 1
    hash_value = 0
    hashes = []

    for i in range(length):
        hash_value = (hash_value * base + ord(s[i])) % modulus
        power = (power * base) % modulus

    hashes.append(hash_value)

    for i in range(length, len(s)):
        hash_value = (hash_value * base - ord(s[i - length]) * power + ord(s[i])) % modulus
        hashes.append(hash_value)

    return hashes

# Binary search to find the length of
# the longest substring that repeats
# at least k times
def binary_search(s, k):
    low = 0
    high = len(s)
    ans = 0

    while low <= high:
        mid = (low + high) // 2

        hashes = rolling_hash(s, mid)

        count = collections.Counter(hashes)

        found = False
        for value in count.values():
            if value >= k:
                found = True
                break

        if found:
            ans = mid
            low = mid + 1
        else:
            high = mid - 1

    return ans

# Driver code
if __name__ == "__main__":
    s = "abababcdabcd"
    k = 2

    ans = binary_search(s, k)

    print(ans)
C#
using System;
using System.Collections.Generic;

public class GFG
{
    public static void Main(string[] args)
    {
        string s = "abababcdabcd";
        int k = 2;

        // Function call
        int ans = BinarySearch(s, k);
        Console.WriteLine(ans);
    }

    // Binary search to find the length of
    // the longest substring that repeats
    // at least k times
    public static int BinarySearch(string s, int k)
    {
        // Initialize the range of possible
        // substring lengths to check
        int low = 0;
        int high = s.Length;
        int ans = 0;

        // Perform binary search on the range
        // of possible substring lengths
        while (low <= high)
        {
            // Compute the midpoint of
            // the range
            int mid = (low + high) / 2;

            // Compute the rolling hash for
            // all substrings of length mid
            int[] hashes = RollingHash(s, mid);

            // Count the frequency of
            // each hash value
            Dictionary<int, int> count = new Dictionary<int, int>();
            foreach (int hash in hashes)
            {
                if (count.ContainsKey(hash))
                    count[hash]++;
                else
                    count[hash] = 1;
            }

            // Check if any hash value
            // appears at least k times
            bool found = false;
            foreach (int value in count.Values)
            {
                if (value >= k)
                {
                    found = true;
                    break;
                }
            }

            // If a repeating substring of
            // length mid appears at least k
            // times, update the answer and
            // search the upper half
            // of the range
            if (found)
            {
                ans = mid;
                low = mid + 1;
            }
            // Otherwise, search the
            // lower half of the range
            else
            {
                high = mid - 1;
            }
        }

        // Return the length of the
        // longest substring
        return ans;
    }

    public static int[] RollingHash(string s, int length)
    {
        // Define constants for the base and
        // modulus of the hash function
        int @base = 53;
        int modulus = (int)(Math.Pow(10, 9) + 7);

        int power = 1;
        int hashValue = 0;
        int[] hashes = new int[s.Length - length + 1];

        // Initialize the hash value for the
        // first substring
        for (int i = 0; i < length; i++)
        {
            hashValue = (int)(((long)hashValue * @base + (int)s[i]) % modulus);
            power = (int)(((long)power * @base) % modulus);
        }
        hashes[0] = hashValue;

        // Compute the hash value for all
        // subsequent substrings using the
        // rolling hash technique
        for (int i = length; i < s.Length; i++)
        {
            hashValue = (int)((((long)hashValue * @base - (int)s[i - length] * power + (int)s[i]) % modulus + modulus) % modulus);

            // Store the hash value in the
            // hashes vector
            hashes[i - length + 1] = hashValue;
        }

        // Return the hash value for the last
        // substring
        return hashes;
    }
}
JavaScript
function rolling_hash(s, length) {
    
    // Define constants for the base and
    // modulus of the hash function
    const base = 53;
    const modulus = 10**9 + 7;
    let power = 1;
    
    // Initialize the hash value for the
    // first substring
    let hash_value = 0;
    let hashes = [];
    for (let i = 0; i < length; i++) {
        hash_value = (hash_value * base + s.charCodeAt(i)) % modulus;
        power = (power * base) % modulus;
    }
    
    // Store the hash value for the first
    // substring in the hashes vector
    hashes.push(hash_value);
    
    // Compute the hash value for all
    // subsequent substrings using the
    // rolling hash technique
    for (let i = length; i < s.length; i++) {
        hash_value = (hash_value * base - s.charCodeAt(i - length) * power + s.charCodeAt(i)) % modulus;
        
        // Store the hash value in the
        // hashes vector
        hashes.push(hash_value);
    }
    
    // Return the hash value for the last
    // substring
    return hashes;
}

// Binary search to find the length of
// the longest substring that repeats
// at least k times
function binary_search(s, k) {
    
    // Initialize the range of possible
    // substring lengths to check
    let low = 0;
    let high = s.length;
    let ans = 0;
    
    // Perform binary search on the range
    // of possible substring lengths
    while (low <= high) {
        
        // Compute the midpoint of
        // the range
        let mid = Math.floor((low + high) / 2);
        
        // Compute the rolling hash for
        // all substrings of length mid
        let hashes = rolling_hash(s, mid);
        
        // Count the frequency of
        // each hash value
        let count = new Map();
        for (let i = 0; i < hashes.length; i++) {
            if (count.has(hashes[i])) {
                count.set(hashes[i], count.get(hashes[i]) + 1);
            } else {
                count.set(hashes[i], 1);
            }
        }
        
        // Check if any hash value
        // appears at least k times
        let found = false;
        for (let value of count.values()) {
            if (value >= k) {
                found = true;
                break;
            }
        }
        
        // If a repeating substring of
        // length mid appears at least k
        // times, update the answer and
        // search the upper half
        // of the range
        if (found) {
            ans = mid;
            low = mid + 1;
        }
        // Otherwise, search the
        // lower half of the range
        else {
            high = mid - 1;
        }
    }
    
    // Return the length of the
    // longest substring
    return ans;
}


// Function call
let s = "abababcdabcd";
let k = 2;
let ans = binary_search(s, k);
console.log(ans);

Output
4

Time Complexity: O(N*log(N))
Auxiliary Space: O(N)

Comment