Resources

Reference Material

Competition-ready code templates, key formulas with derivations, and common implementation pitfalls.

Code Templates

Recursive FFT

Divide-and-conquer FFT using std::complex<double>. Clean and readable — good for learning.

C++
#include <bits/stdc++.h>
using namespace std;
using cd = complex<double>;
const double PI = acos(-1);

// Recursive Cooley-Tukey FFT
// inv=false: forward DFT, inv=true: inverse DFT
void fft(vector<cd>& a, bool inv) {
    int n = a.size();
    if (n == 1) return;

    vector<cd> a0(n / 2), a1(n / 2);
    for (int i = 0; i < n / 2; i++) {
        a0[i] = a[2 * i];
        a1[i] = a[2 * i + 1];
    }
    fft(a0, inv);
    fft(a1, inv);

    double ang = 2 * PI / n * (inv ? -1 : 1);
    cd w(1), wn(cos(ang), sin(ang));
    for (int i = 0; i < n / 2; i++, w *= wn) {
        a[i]         = a0[i] + w * a1[i];
        a[i + n / 2] = a0[i] - w * a1[i];
    }
    if (inv)
        for (cd& x : a) x /= 2;
}

// Multiply polynomials a and b, return coefficients
vector<long long> multiply(vector<int>& a, vector<int>& b) {
    vector<cd> fa(a.begin(), a.end()), fb(b.begin(), b.end());
    int n = 1;
    while (n < (int)(a.size() + b.size())) n <<= 1;
    fa.resize(n); fb.resize(n);
    fft(fa, false); fft(fb, false);
    for (int i = 0; i < n; i++) fa[i] *= fb[i];
    fft(fa, true);
    vector<long long> res(n);
    for (int i = 0; i < n; i++)
        res[i] = llround(fa[i].real());
    return res;
}

Iterative FFT

In-place iterative FFT with bit-reversal permutation. Preferred for competitive programming.

C++
#include <bits/stdc++.h>
using namespace std;
using cd = complex<double>;
const double PI = acos(-1);

// Iterative Cooley-Tukey FFT with bit-reversal
void fft(vector<cd>& a, bool inv) {
    int n = a.size();
    // Bit-reversal permutation
    for (int i = 1, j = 0; i < n; i++) {
        int bit = n >> 1;
        for (; j & bit; bit >>= 1) j ^= bit;
        j ^= bit;
        if (i < j) swap(a[i], a[j]);
    }
    // Butterfly passes
    for (int len = 2; len <= n; len <<= 1) {
        double ang = 2 * PI / len * (inv ? -1 : 1);
        cd wlen(cos(ang), sin(ang));
        for (int i = 0; i < n; i += len) {
            cd w(1);
            for (int j = 0; j < len / 2; j++, w *= wlen) {
                cd u = a[i + j], v = w * a[i + j + len / 2];
                a[i + j]           = u + v;
                a[i + j + len / 2] = u - v;
            }
        }
    }
    if (inv)
        for (cd& x : a) x /= n;
}

NTT mod 998244353

Number Theoretic Transform for exact integer results modulo 998244353.

C++
#include <bits/stdc++.h>
using namespace std;

const int MOD = 998244353;  // = 119 * 2^23 + 1, primitive root g = 3
const int g = 3;

long long power(long long a, long long b, long long mod) {
    long long res = 1;
    a %= mod;
    while (b > 0) {
        if (b & 1) res = res * a % mod;
        a = a * a % mod;
        b >>= 1;
    }
    return res;
}

// Number Theoretic Transform (mod 998244353)
void ntt(vector<int>& a, bool inv) {
    int n = a.size();
    for (int i = 1, j = 0; i < n; i++) {
        int bit = n >> 1;
        for (; j & bit; bit >>= 1) j ^= bit;
        j ^= bit;
        if (i < j) swap(a[i], a[j]);
    }
    for (int len = 2; len <= n; len <<= 1) {
        long long w = inv
            ? power(g, MOD - 1 - (MOD - 1) / len, MOD)
            : power(g, (MOD - 1) / len, MOD);
        for (int i = 0; i < n; i += len) {
            long long wk = 1;
            for (int j = 0; j < len / 2; j++, wk = wk * w % MOD) {
                int u = a[i + j];
                int v = (long long)wk * a[i + j + len / 2] % MOD;
                a[i + j]           = (u + v) % MOD;
                a[i + j + len / 2] = (u - v + MOD) % MOD;
            }
        }
    }
    if (inv) {
        long long n_inv = power(n, MOD - 2, MOD);
        for (int& x : a) x = (long long)x * n_inv % MOD;
    }
}

vector<int> multiply(vector<int> a, vector<int> b) {
    int result_size = a.size() + b.size() - 1;
    int n = 1;
    while (n < result_size) n <<= 1;
    a.resize(n); b.resize(n);
    ntt(a, false); ntt(b, false);
    for (int i = 0; i < n; i++)
        a[i] = (long long)a[i] * b[i] % MOD;
    ntt(a, true);
    a.resize(result_size);
    return a;
}

Python FFT

Recursive FFT in Python using cmath. Compatible with the built-in editor.

Python
import cmath
import math

def fft(a, inv=False):
    """Recursive Cooley-Tukey FFT."""
    n = len(a)
    if n == 1:
        return list(a)
    even = fft(a[0::2], inv)
    odd  = fft(a[1::2], inv)
    sign = 1 if inv else -1
    T = [cmath.exp(sign * 2j * math.pi * k / n) * odd[k]
         for k in range(n // 2)]
    return (
        [even[k] + T[k] for k in range(n // 2)] +
        [even[k] - T[k] for k in range(n // 2)]
    )

def multiply_polynomials(a, b):
    """Multiply polynomials a and b (coefficient lists)."""
    result_len = len(a) + len(b) - 1
    n = 1
    while n < result_len:
        n <<= 1
    # Zero-pad
    fa = list(a) + [0] * (n - len(a))
    fb = list(b) + [0] * (n - len(b))
    # Forward FFT
    FA = fft(fa)
    FB = fft(fb)
    # Pointwise multiply
    FC = [FA[i] * FB[i] for i in range(n)]
    # Inverse FFT
    fc = fft(FC, inv=True)
    # Scale and round
    return [round(x.real / n) for x in fc[:result_len]]

# Example: (1 + 2x + 3x^2) * (4 + 5x) = 4 + 13x + 22x^2 + 15x^3
print(multiply_polynomials([1, 2, 3], [4, 5]))

Key Formulas

Σ

Discrete Fourier Transform

Xk=j=0N1xje2πikj/NX_k = \sum_{j=0}^{N-1} x_j \cdot e^{-2\pi i k j / N}

Evaluates the input polynomial at the N-th roots of unity. Each output X_k is the polynomial evaluated at ω_N^k.

Σ

Inverse DFT

xj=1Nk=0N1Xke2πikj/Nx_j = \frac{1}{N} \sum_{k=0}^{N-1} X_k \cdot e^{2\pi i k j / N}

Reconstructs the original sequence from its DFT. Note the 1/N scaling factor required after the inverse transform.

Σ

Butterfly Operation

X[k]=E[k]+ωNkO[k]X[k+N/2]=E[k]ωNkO[k]X[k] = E[k] + \omega_N^k \cdot O[k] \qquad X[k+N/2] = E[k] - \omega_N^k \cdot O[k]

Core of the Cooley-Tukey algorithm. Combines the DFT of even-indexed (E) and odd-indexed (O) sub-problems.

Σ

Twiddle Factor

ωNk=e2πik/N=cos ⁣(2πkN)isin ⁣(2πkN)\omega_N^k = e^{-2\pi i k / N} = \cos\!\left(\frac{2\pi k}{N}\right) - i\sin\!\left(\frac{2\pi k}{N}\right)

The complex root of unity used in the butterfly. These are the N-th roots of unity equally spaced on the unit circle.

Σ

Convolution Theorem

(ab)=F1 ⁣(F(a)F(b))(a \ast b) = \mathcal{F}^{-1}\!\left(\mathcal{F}(a) \cdot \mathcal{F}(b)\right)

Convolution in the time domain equals pointwise multiplication in the frequency domain. This is why FFT enables O(n log n) polynomial multiplication.

Common Mistakes

Array not padded to a power of 2

FFT requires arrays of length 2^k. Multiplying polynomials of degree n and m produces degree n+m, requiring 2^k ≥ n+m+1 elements.

Fix: Pad to the next power of 2 ≥ (len_a + len_b - 1) before calling FFT.

Using floating-point arithmetic for NTT

NTT operates over modular integers, not complex doubles. Using std::complex<double> with NTT will give wrong answers.

Fix: Use integer arrays with modular arithmetic. Replace omega with modular power: g^((MOD-1)/n) mod MOD.

Off-by-one in result array size

Multiplying a degree-n polynomial by a degree-m polynomial gives a degree-(n+m) polynomial, which has n+m+1 coefficients.

Fix: Allocate result array of size a.size() + b.size() - 1, then resize after IFFT.

Forgetting to divide by N after inverse FFT

The inverse DFT formula includes a 1/N scaling factor. Many iterative implementations omit this step.

Fix: After IFFT, divide every element by n: for (auto& x : a) x /= n;

Not reducing mod p in each NTT butterfly step

In NTT, intermediate products must be reduced mod p at each step to prevent 64-bit integer overflow.

Fix: Apply % MOD after every multiplication: v = (long long)wk * a[i+j+len/2] % MOD;