智乃与模数

思考过程：首先先看数据范围，n和m均属于1e9的范围，我们一个个求出1的n取模的结果然后再暴力求解这样肯定是会超时的，不可行。于是我们先从余数的计算公式和定义出发，在此题中余数=n-[n/i]下取整*i;
我们n/i是可以分块的，而且我们可以算出分块的左右边界和块的大小，比如在样例中 n分别与[6,7,8,9,10]相除求得的结果都是1，我们发现[6,7,8,9,10]->(取余结果是)-[4,3,2,1,0]!奇怪的是我们发现是余数大小是递减的，我们多观察几组发现这个块内的余数不仅具有递减的特性，还具有等差的特性，有了这个特点之后，我们可以快速求出相同块的区间和。但是知道这个我们如何求出最大的k个余数的和呢，我们似乎不知道答案，正面求答案貌似很难，我们想想能不能看看枚举最大的余数，看看大于等于这个余数的数量是不是大于等于k个，我们找到最大的余数x，那我们还是不知道总和呀，我们在处理的时候让求出维护一下每个块中满足条件的总和。所有我们的目的很明确:1.在二分答案的时候我们维护一个cnt表示大于等于最大余数x的数量，我们需要这个数量大于等于k.2.我们需要维护一个tot维护块中满足余数大于等于x的余数总和，最后我们要满足条件1的tot才是有意义的。
AC代码（附带注释）：

#include <bits/stdc++.h>
using namespace std;
// CJX__//
typedef long long ll; // 不开long long 见祖宗
typedef unsigned long long ull;
typedef __int128 i128;
typedef pair<int, int> PII;
typedef pair<ll, ll> PLL;
typedef vector<int> vi;
typedef vector<ll> vll;
typedef vector<double> vd;
typedef vector<PII> vPII;
#define IOS                  \
    ios::sync_with_stdio(0); \
    cin.tie(0);              \
    cout.tie(0);
#define debug(...) cout << "[debug] " #__VA_ARGS__ " = " << (__VA_ARGS__) << endl;
#define out(x) cout << ((x) ? "YES" : "NO") << endl
#define mod(x, P) (((x) % (P) + (P)) % (P))
#define endl '\n'
#define gcd __gcd
#define lc p << 1
#define rc p << 1 | 1
#define INF 0x3f3f3f3f3f3f3f3f
#define inf 0x3f3f3f3f
#define fi first
#define se second
#define all(x) x.begin(), x.end()
#define lowbit(x) ((x) & (-x))
#define rep(i, x, n) for (ll i = x; i <= n; i++)
#define dep(i, x, n) for (ll i = x; i >= n; i--)
#define mem(a, x) memset(a, x, sizeof a)
const double eps = 1e-5;
const int N = 1e5 + 10, M = 2 * N, K = 26;
const ll MOD = 1e9 + 7, Md3 = 998244353, Md7 = 1e9 + 7, Md9 = 1e9 + 9;
const ll base1 = 131, base2 = 13331;
const int dx[8] = {-1, 0, 1, 0, -1, -1, 1, 1}, dy[8] = {0, 1, 0, -1, -1, 1, -1, 1};
const int ddx[8] = {1, 1, 2, 2, -1, -1, -2, -2}, ddy[8] = {2, -2, 1, -1, 2, -2, 1, -1};
template <typename T>
bool cmin(T &a, const T &b) { return b < a ? a = b, 1 : 0; }
template <typename T>
bool cmax(T &a, const T &b) { return b > a ? a = b, 1 : 0; }
template <typename T>
void sort_range(vector<T> &v, int l, int r) { sort(v.begin() + l, v.begin() + r + 1); }
template <typename T>
struct BIT1
{
    int n;
    vector<T> tr;
    BIT1(int n) : n(n), tr(n + 1) {}
    void add(int x, T v)
    {
        for (; x <= n; x += x & -x)
            tr[x] += v;
    }
    T sum(int x)
    {
        T r = 0;
        for (; x; x -= x & -x)
            r += tr[x];
        return r;
    }
    T range(int l, int r) { return sum(r) - sum(l - 1); }
};
template <typename T>
struct BIT2
{
    int n, m;
    vector<vector<T>> t1, t2, t3, t4;
    BIT2(int n_ = 0, int m_ = 0) { init(n_, m_); }
    void init(int n_, int m_)
    {
        n = n_;
        m = m_;
        t1.assign(n + 1, vector<T>(m + 1, T{}));
        t2.assign(n + 1, vector<T>(m + 1, T{}));
        t3.assign(n + 1, vector<T>(m + 1, T{}));
        t4.assign(n + 1, vector<T>(m + 1, T{}));
    }
    void _add(int x, int y, const T &v)
    {
        for (int i = x; i <= n; i += i & -i)
            for (int j = y; j <= m; j += j & -j)
            {
                t1[i][j] += v;
                t2[i][j] += v * x;
                t3[i][j] += v * y;
                t4[i][j] += v * x * y;
            }
    }
    void rangeAdd(int x1, int y1, int x2, int y2, const T &v)
    {
        _add(x1, y1, v);
        _add(x1, y2 + 1, -v);
        _add(x2 + 1, y1, -v);
        _add(x2 + 1, y2 + 1, v);
    }
    T prefixSum(int x, int y)
    {
        T r{};
        for (int i = x; i > 0; i -= i & -i)
            for (int j = y; j > 0; j -= j & -j)
                r += t1[i][j] * (x + 1) * (y + 1) - t2[i][j] * (y + 1) - t3[i][j] * (x + 1) + t4[i][j];
        return r;
    }
    T rangeSum(int x1, int y1, int x2, int y2)
    {
        if (x1 > x2 || y1 > y2)
            return T{};
        return prefixSum(x2, y2) - prefixSum(x1 - 1, y2) - prefixSum(x2, y1 - 1) + prefixSum(x1 - 1, y1 - 1);
    }
};
struct Random
{
    mt19937_64 rng;
    Random() : rng(chrono::steady_clock::now().time_since_epoch().count()) {}
    ull rand_ull(ull max_val = -1) { return rng() % (max_val + 1); }
    ll rand_ll(ll l, ll r) { return uniform_int_distribution<ll>(l, r)(rng); }
    int rand_int(int l, int r) { return uniform_int_distribution<int>(l, r)(rng); }
    double rand_db(double l, double r) { return uniform_real_distribution<double>(l, r)(rng); }
    bool rand_bool(double p = 0.5) { return bernoulli_distribution(p)(rng); }
    template <typename T>
    void shuffle(vector<T> &v) { std::shuffle(v.begin(), v.end(), rng); }
};
ll qmi(ll a, ll b, ll p)
{
    ll res = 1 % p;
    a %= p;
    while (b)
    {
        if (b & 1)
            res = res * a % p;
        a = a * a % p;
        b >>= 1;
    }
    return res;
}
/*

*/

void solve()
{
    ll n, k;
    cin >> n >> k;
    ll ans = 0, tot = 0, cnt = 0;
    auto check = [&](ll x) -> bool
    {
        ll L = 1, R = 1;
        tot = 0, cnt = 0;
        while (L <= n)
        {
            /*
                t是当前块的编号（n/i）
                y是块中满足余数大于x的下标
                R是满足当前块编号的最大值（右边界）
            */
            ll t = n / L, y = (n - x) / t;
            R = n / t;
            if (y >= L)
            {
                ll d = y - L + 1;
                cnt += d;
                tot += d * (n - t * L + n - y * t) / 2;
            }
            L = R + 1;
        }
        return cnt >= k;
    };
    ll l = 0, r = n;
    while (l + 1 < r)
    {
        ll mid = l + (r - l) / 2;
        if (check(mid))
            l = mid;
        else
            r = mid;
    }
    ans = tot - (cnt - k) * l;//可能cnt>k 那么重复多的就是l 也就是二分的=查到的这个值多了
    //这不难想明白，用反证法可以很容易想清楚为什么重复的是最大的余数x，因为我们这个已经最大的余数了，如果重复的不是x，那么说明纯在更大的x，那么二分就不正确；
    cout << ans << endl;
}

int main()
{
    IOS;

    int _ = 1;
    // cin>>_;//如果是多组数据
    while (_--)
    {
        solve();
    }
    return 0;
}

思考过程:
题意：小红定义一个数组是“双生数组”，当且仅当该数组大小为偶数，数组的元素种类恰好为2 种，且这两种元素的出现次数相同。让我们求双生数组的数量。
首先我们明白是就是双生数组里面只有两个不同的元素，且出现次数相同，我们找到两个元素的区间不难，但是我们如何计算区间内满足条件的数组用多少个呢，既然是两个元素，我们可以把某一个元素看成+1的贡献，另一个视为-1的贡献，我们维护一个这样数组的前缀和和维护出现不同值的次数，这样的好处是可以减少数组数据范围的大小，然后我们知道这样前缀和出现相同值的时候就是出现符合条件的，这样我们就可以动态的累加算出正确答案了。

AC代码如下:

#include <bits/stdc++.h>
using namespace std;
// CJX__//
typedef long long ll; // 不开long long 见祖宗
typedef unsigned long long ull;
typedef __int128 i128;
typedef pair<int, int> PII;
typedef pair<ll, ll> PLL;
typedef vector<int> vi;
typedef vector<ll> vll;
typedef vector<double> vd;
typedef vector<PII> vPII;
#define IOS                  \
    ios::sync_with_stdio(0); \
    cin.tie(0);              \
    cout.tie(0);
#define debug(...) cout << "[debug] " #__VA_ARGS__ " = " << (__VA_ARGS__) << endl;
#define out(x) cout << ((x) ? "YES" : "NO") << endl
#define mod(x, P) (((x) % (P) + (P)) % (P))
#define endl '\n'
#define gcd __gcd
#define lc p << 1
#define rc p << 1 | 1
#define INF 0x3f3f3f3f3f3f3f3f
#define inf 0x3f3f3f3f
#define fi first
#define se second
#define all(x) x.begin(), x.end()
#define lowbit(x) ((x) & (-x))
#define rep(i, x, n) for (ll i = x; i <= n; i++)
#define dep(i, x, n) for (ll i = x; i >= n; i--)
#define mem(a, x) memset(a, x, sizeof a)
const double eps = 1e-5;
const int N = 1e5 + 10, M = 2 * N, K = 26;
const ll MOD = 1e9 + 7, Md3 = 998244353, Md7 = 1e9 + 7, Md9 = 1e9 + 9;
const ll base1 = 131, base2 = 13331;
const int dx[8] = {-1, 0, 1, 0, -1, -1, 1, 1}, dy[8] = {0, 1, 0, -1, -1, 1, -1, 1};
const int ddx[8] = {1, 1, 2, 2, -1, -1, -2, -2}, ddy[8] = {2, -2, 1, -1, 2, -2, 1, -1};
template <typename T>
bool cmin(T &a, const T &b) { return b < a ? a = b, 1 : 0; }
template <typename T>
bool cmax(T &a, const T &b) { return b > a ? a = b, 1 : 0; }
template <typename T>
void sort_range(vector<T> &v, int l, int r) { sort(v.begin() + l, v.begin() + r + 1); }
template <typename T>
struct BIT1
{
    int n;
    vector<T> tr;
    BIT1(int n) : n(n), tr(n + 1) {}
    void add(int x, T v)
    {
        for (; x <= n; x += x & -x)
            tr[x] += v;
    }
    T sum(int x)
    {
        T r = 0;
        for (; x; x -= x & -x)
            r += tr[x];
        return r;
    }
    T range(int l, int r) { return sum(r) - sum(l - 1); }
};
template <typename T>
struct BIT2
{
    int n, m;
    vector<vector<T>> t1, t2, t3, t4;
    BIT2(int n_ = 0, int m_ = 0) { init(n_, m_); }
    void init(int n_, int m_)
    {
        n = n_;
        m = m_;
        t1.assign(n + 1, vector<T>(m + 1, T{}));
        t2.assign(n + 1, vector<T>(m + 1, T{}));
        t3.assign(n + 1, vector<T>(m + 1, T{}));
        t4.assign(n + 1, vector<T>(m + 1, T{}));
    }
    void _add(int x, int y, const T &v)
    {
        for (int i = x; i <= n; i += i & -i)
            for (int j = y; j <= m; j += j & -j)
            {
                t1[i][j] += v;
                t2[i][j] += v * x;
                t3[i][j] += v * y;
                t4[i][j] += v * x * y;
            }
    }
    void rangeAdd(int x1, int y1, int x2, int y2, const T &v)
    {
        _add(x1, y1, v);
        _add(x1, y2 + 1, -v);
        _add(x2 + 1, y1, -v);
        _add(x2 + 1, y2 + 1, v);
    }
    T prefixSum(int x, int y)
    {
        T r{};
        for (int i = x; i > 0; i -= i & -i)
            for (int j = y; j > 0; j -= j & -j)
                r += t1[i][j] * (x + 1) * (y + 1) - t2[i][j] * (y + 1) - t3[i][j] * (x + 1) + t4[i][j];
        return r;
    }
    T rangeSum(int x1, int y1, int x2, int y2)
    {
        if (x1 > x2 || y1 > y2)
            return T{};
        return prefixSum(x2, y2) - prefixSum(x1 - 1, y2) - prefixSum(x2, y1 - 1) + prefixSum(x1 - 1, y1 - 1);
    }
};
struct Random
{
    mt19937_64 rng;
    Random() : rng(chrono::steady_clock::now().time_since_epoch().count()) {}
    ull rand_ull(ull max_val = -1) { return rng() % (max_val + 1); }
    ll rand_ll(ll l, ll r) { return uniform_int_distribution<ll>(l, r)(rng); }
    int rand_int(int l, int r) { return uniform_int_distribution<int>(l, r)(rng); }
    double rand_db(double l, double r) { return uniform_real_distribution<double>(l, r)(rng); }
    bool rand_bool(double p = 0.5) { return bernoulli_distribution(p)(rng); }
    template <typename T>
    void shuffle(vector<T> &v) { std::shuffle(v.begin(), v.end(), rng); }
};
ll qmi(ll a, ll b, ll p)
{
    ll res = 1 % p;
    a %= p;
    while (b)
    {
        if (b & 1)
            res = res * a % p;
        a = a * a % p;
        b >>= 1;
    }
    return res;
}
/*

*/

void solve()
{
    int n;
    cin >> n;
    vi a(n + 1, 0);
    rep(i, 1, n) cin >> a[i];
    auto calc = [&](int l, int r, int x, int y) -> ll
    {
        ll res = 0;
        vll pre(r - l + 1 + 1, 0);
        map<int, int> cnt;
        cnt[0] = 1;
        rep(i, l, r)
        {
            pre[i - l + 1] = pre[i - l] + (a[i] == x ? 1 : -1);
            res += cnt[pre[i - l + 1]];
            cnt[pre[i - l + 1]]++;
        }
        return res;
    };
    int i = 2, l = 1, r = 1;
    ll ans = 0;
    while (i <= n)
    {
        if (a[i] != a[i - 1])
        {
             l = i - 1, r = i;
            while (l - 1 >= 1 && (a[l - 1] == a[i] || a[l - 1] == a[i - 1]))
                l--;
            while (r + 1 <= n && (a[r + 1] == a[i] || a[r + 1] == a[i - 1]))
                r++;
            ans += calc(l, r, a[i - 1], a[i]);
            i = r;
        }
        i++;
    }
    cout << ans << endl;
}

int main()
{
    IOS;

    int _ = 1;
     cin>>_;//如果是多组数据
    while (_--)
    {
        solve();
    }
    return 0;
}

NowCoder Contest 95323 K - 区间 GCD 与异或和

🔗 题目信息

来源: 牛客网 (NowCoder)
标签: ST表 二分查找 位运算 数论 前缀和

💡 解题思路

1. 问题转化 (等式变换)

题目要求统计有多少个区间 $[l, r]$ 满足：
$$\gcd(a_l, \dots, a_r) = a_l \oplus \dots \oplus a_r$$

利用 前缀异或数组 pre，将区间异或和转化为 $\text{pre}[r] \oplus \text{pre}[l-1]$。
根据异或性质 $A = B \oplus C \iff B = A \oplus C$，原式等价于：
$$\text{pre}[r] = \gcd(l, r) \oplus \text{pre}[l-1]$$

核心逻辑：
固定左端点 $l$，若确定区间 GCD 为 $g$，则问题转化为：在指定范围内，找有多少个下标 $k$ 满足 pre[k] == g ^ pre[l-1]。

2. 核心性质 (GCD 的阶梯性)

对于固定的 $l$，随着 $r$ 增加，$\gcd(l, r)$ 单调不增且变化次数极少（最多 $O(\log \max A)$ 次）。
我们可以利用 ST表 + 二分查找 快速跳过 GCD 相同的子段，将 $O(N^2)$ 优化为 $O(N \log N)$。

3. 算法流程

预处理：计算 pre 数组；用 map<int, vector<int>> 记录每个异或值的下标列表；构建 ST 表。
枚举与跳跃：
- 外层枚举 $l$。
- 内层二分查找当前 GCD 值维持的最右边界 $L$。
- 在区间 $[r, L]$ 内，利用 map 二分统计满足条件的下标数。
- 更新 $r = L + 1$，处理下一段。

⏳ 复杂度分析

时间: $O(N \cdot \log(\max A) \cdot \log N)$，约 $10^8$ 计算量，满足 2s 时限。
空间: $O(N \log N)$ (ST表)。

💻 代码实现

#include <bits/stdc++.h>
using namespace std;
// CJX__//
typedef long long ll;
typedef vector<int> vi;
#define IOS ios::sync_with_stdio(0); cin.tie(0); cout.tie(0);
#define endl '\n'
#define gcd __gcd
#define all(x) x.begin(), x.end()
#define rep(i, x, n) for (ll i = x; i <= n; i++)

const int N = 2e5 + 10;
int n;
int a[N];
int f[N][21], lg[N];

// ST 表预处理
void init() {
    lg[0] = -1;
    rep(i, 1, n) {
        lg[i] = lg[i >> 1] + 1;
        f[i][0] = a[i];
    }
}

void solve() {
    cin >> n;
    map<int, vi> pos;
    vi pre(n + 2, 0);
    
    // 读入并预处理
    rep(i, 1, n) {
        cin >> a[i];
        pre[i] = pre[i - 1] ^ a[i];
        pos[pre[i]].push_back(i);
    }
    
    init();
    
    // 构建 ST 表
    for (int j = 1; j <= lg[n]; j++) {
        for (int i = 1; i + (1 << j) - 1 <= n; i++) {
            f[i][j] = gcd(f[i][j - 1], f[i + (1 << (j - 1))][j - 1]);
        }
    }
    
    auto query = [&](int l, int r) -> int {
        int k = lg[r - l + 1];
        return gcd(f[l][k], f[r - (1 << k) + 1][k]);
    };
    
    ll ans = 0;
    
    rep(l, 1, n) {
        int r = l + 1;
        while (r <= n) {
            int g = query(l, r);
            
            // 二分查找当前 GCD 维持的最右边界 L
            int L = r - 1, R = n + 1;
            while (L + 1 < R) {
                int mid = (L + R) >> 1;
                if (query(l, mid) == g) L = mid;
                else R = mid;
            }
            
            // 统计答案
            int tag = g ^ pre[l - 1];
            if (pos.count(tag)) {
                vi &vec = pos[tag];
                auto it1 = lower_bound(all(vec), r);
                auto it2 = upper_bound(all(vec), L);
                if (it2 > it1) ans += (it2 - it1);
            }
            
            r = L + 1;
        }
    }
    
    cout << ans << endl;
}

int main() {
    IOS;
    int _ = 1;
    cin >> _; 
    while (_--) {
        solve();
    }
    return 0;
}

智乃的Notepad(Easy version)

思考过程：既然这个单词出现不考虑顺序，我们只需要求某段区间求出需要敲打键盘的最小次数就行，那么问题就简单了，要想求最小的键盘敲打次数，我们就想从一个公共子串出发，诶，我们发现最长的那个串貌似不会需要删除，我们把它放在最后面，那么是不是不会重复了，删除好像是一次回溯，如果这个字符一直不用删除那么它的贡献就是1，如果要删除，贡献就是2，我们想起一个字典树的数据结构，与这个很类似，假设我们对于前缀不同的字符我们像字典一样存着，如果前缀相同但是当前不同我们就需要创一个新的节点，如果依然相同那么就无需再创，我们会发现最后我们求出的答案就是字典树节点的个数*2-最长串的长度，因为假设全部都要删除，显然最后那个最长串如果放在最后的话是无需删除的，所有我们要把最长串多出的贡献减掉。
AC代码：

#include <bits/stdc++.h>
using namespace std;
// CJX__//
typedef long long ll; // 不开long long 见祖宗
typedef unsigned long long ull;
typedef __int128 i128;
typedef pair<int, int> PII;
typedef pair<ll, ll> PLL;
typedef vector<int> vi;
typedef vector<ll> vll;
typedef vector<double> vd;
typedef vector<PII> vPII;
#define IOS                  \
    ios::sync_with_stdio(0); \
    cin.tie(0);              \
    cout.tie(0);
#define debug(...) cout << "[debug] " #__VA_ARGS__ " = " << (__VA_ARGS__) << endl;
#define out(x) cout << ((x) ? "YES" : "NO") << endl
#define mod(x, P) (((x) % (P) + (P)) % (P))
#define endl '\n'
#define gcd __gcd
#define lc p << 1
#define rc p << 1 | 1
#define INF 0x3f3f3f3f3f3f3f3f
#define inf 0x3f3f3f3f
#define fi first
#define se second
#define all(x) x.begin(), x.end()
#define lowbit(x) ((x) & (-x))
#define rep(i, x, n) for (ll i = x; i <= n; i++)
#define dep(i, x, n) for (ll i = x; i >= n; i--)
#define mem(a, x) memset(a, x, sizeof a)
const double eps = 1e-5;
const int N = 1e6 + 10, M = 2 * N, K = 26;
const ll MOD = 1e9 + 7, Md3 = 998244353, Md7 = 1e9 + 7, Md9 = 1e9 + 9;
const ll base1 = 131, base2 = 13331;
const int dx[8] = {-1, 0, 1, 0, -1, -1, 1, 1}, dy[8] = {0, 1, 0, -1, -1, 1, -1, 1};
const int ddx[8] = {1, 1, 2, 2, -1, -1, -2, -2}, ddy[8] = {2, -2, 1, -1, 2, -2, 1, -1};
template <typename T>
bool cmin(T &a, const T &b) { return b < a ? a = b, 1 : 0; }
template <typename T>
bool cmax(T &a, const T &b) { return b > a ? a = b, 1 : 0; }
template <typename T>
void sort_range(vector<T> &v, int l, int r) { sort(v.begin() + l, v.begin() + r + 1); }
template <typename T>
struct BIT1
{
    int n;
    vector<T> tr;
    BIT1(int n) : n(n), tr(n + 1) {}
    void add(int x, T v)
    {
        for (; x <= n; x += x & -x)
            tr[x] += v;
    }
    T sum(int x)
    {
        T r = 0;
        for (; x; x -= x & -x)
            r += tr[x];
        return r;
    }
    T range(int l, int r) { return sum(r) - sum(l - 1); }
};
template <typename T>
struct BIT2
{
    int n, m;
    vector<vector<T>> t1, t2, t3, t4;
    BIT2(int n_ = 0, int m_ = 0) { init(n_, m_); }
    void init(int n_, int m_)
    {
        n = n_;
        m = m_;
        t1.assign(n + 1, vector<T>(m + 1, T{}));
        t2.assign(n + 1, vector<T>(m + 1, T{}));
        t3.assign(n + 1, vector<T>(m + 1, T{}));
        t4.assign(n + 1, vector<T>(m + 1, T{}));
    }
    void _add(int x, int y, const T &v)
    {
        for (int i = x; i <= n; i += i & -i)
            for (int j = y; j <= m; j += j & -j)
            {
                t1[i][j] += v;
                t2[i][j] += v * x;
                t3[i][j] += v * y;
                t4[i][j] += v * x * y;
            }
    }
    void rangeAdd(int x1, int y1, int x2, int y2, const T &v)
    {
        _add(x1, y1, v);
        _add(x1, y2 + 1, -v);
        _add(x2 + 1, y1, -v);
        _add(x2 + 1, y2 + 1, v);
    }
    T prefixSum(int x, int y)
    {
        T r{};
        for (int i = x; i > 0; i -= i & -i)
            for (int j = y; j > 0; j -= j & -j)
                r += t1[i][j] * (x + 1) * (y + 1) - t2[i][j] * (y + 1) - t3[i][j] * (x + 1) + t4[i][j];
        return r;
    }
    T rangeSum(int x1, int y1, int x2, int y2)
    {
        if (x1 > x2 || y1 > y2)
            return T{};
        return prefixSum(x2, y2) - prefixSum(x1 - 1, y2) - prefixSum(x2, y1 - 1) + prefixSum(x1 - 1, y1 - 1);
    }
};
struct Random
{
    mt19937_64 rng;
    Random() : rng(chrono::steady_clock::now().time_since_epoch().count()) {}
    ull rand_ull(ull max_val = -1) { return rng() % (max_val + 1); }
    ll rand_ll(ll l, ll r) { return uniform_int_distribution<ll>(l, r)(rng); }
    int rand_int(int l, int r) { return uniform_int_distribution<int>(l, r)(rng); }
    double rand_db(double l, double r) { return uniform_real_distribution<double>(l, r)(rng); }
    bool rand_bool(double p = 0.5) { return bernoulli_distribution(p)(rng); }
    template <typename T>
    void shuffle(vector<T> &v) { std::shuffle(v.begin(), v.end(), rng); }
};
ll qmi(ll a, ll b, ll p)
{
    ll res = 1 % p;
    a %= p;
    while (b)
    {
        if (b & 1)
            res = res * a % p;
        a = a * a % p;
        b >>= 1;
    }
    return res;
}
/*

*/
int n, m;
int son[N][27], idx;
int cnt[N];
void insert(string s)
{
    int u = 0, p = 0;
    for (auto &ch : s)
    {
        u = ch - 'a';
        if (!son[p][u])
            son[p][u] = ++idx;
        p = son[p][u];
    }
    cnt[p]++;
}

int query(string s)
{
    int p = 0, u = 0;
    for (auto &ch : s)
    {
        u = ch - 'a';
        if (!son[p][u])
            return 0;
        p = son[p][u];
    }
    return cnt[p];
}

void solve()
{
    cin >> n >> m;
    int len = 0;
    rep(i, 1, n)
    {
        string s;
        cin >> s;
        insert(s);
        cmax(len, (int)s.size());
    }
    int l, r;
    cin >> l >> r;
    cout << idx * 2 - len << endl;
}

int main()
{
    IOS;

    int _ = 1;
    // cin>>_;//如果是多组数据
    while (_--)
    {
        solve();
    }
    return 0;
}

智乃的Notepad(Hard version)

和上面那个差不多，加点离线操作维护就好了
AC代码：

#include <bits/stdc++.h>
using namespace std;
// CJX__//
typedef long long ll; // 不开long long 见祖宗
typedef unsigned long long ull;
typedef __int128 i128;
typedef pair<int, int> PII;
typedef pair<ll, ll> PLL;
typedef vector<int> vi;
typedef vector<ll> vll;
typedef vector<double> vd;
typedef vector<PII> vPII;
#define IOS                  \
    ios::sync_with_stdio(0); \
    cin.tie(0);              \
    cout.tie(0);
#define debug(...) cout << "[debug] " #__VA_ARGS__ " = " << (__VA_ARGS__) << endl;
#define out(x) cout << ((x) ? "YES" : "NO") << endl
#define mod(x, P) (((x) % (P) + (P)) % (P))
#define endl '\n'
#define gcd __gcd
#define lc p << 1
#define rc p << 1 | 1
#define INF 0x3f3f3f3f3f3f3f3f
#define inf 0x3f3f3f3f
#define fi first
#define se second
#define all(x) x.begin(), x.end()
#define lowbit(x) ((x) & (-x))
#define rep(i, x, n) for (ll i = x; i <= n; i++)
#define dep(i, x, n) for (ll i = x; i >= n; i--)
#define mem(a, x) memset(a, x, sizeof a)
const double eps = 1e-5;
const int N = 1e5 + 10, M = 1e6 + 10, K = 26;
const ll MOD = 1e9 + 7, Md3 = 998244353, Md7 = 1e9 + 7, Md9 = 1e9 + 9;
const ll base1 = 131, base2 = 13331;
const int dx[8] = {-1, 0, 1, 0, -1, -1, 1, 1}, dy[8] = {0, 1, 0, -1, -1, 1, -1, 1};
const int ddx[8] = {1, 1, 2, 2, -1, -1, -2, -2}, ddy[8] = {2, -2, 1, -1, 2, -2, 1, -1};
template <typename T>
bool cmin(T &a, const T &b) { return b < a ? a = b, 1 : 0; }
template <typename T>
bool cmax(T &a, const T &b) { return b > a ? a = b, 1 : 0; }
template <typename T>
void sort_range(vector<T> &v, int l, int r) { sort(v.begin() + l, v.begin() + r + 1); }
template <typename T>
struct BIT1
{
    int n;
    vector<T> tr;
    BIT1(int n) : n(n), tr(n + 1) {}
    void add(int x, T v)
    {
        for (; x <= n; x += x & -x)
            tr[x] += v;
    }
    T sum(int x)
    {
        T r = 0;
        for (; x; x -= x & -x)
            r += tr[x];
        return r;
    }
    T range(int l, int r) { return sum(r) - sum(l - 1); }
};
template <typename T>
struct BIT2
{
    int n, m;
    vector<vector<T>> t1, t2, t3, t4;
    BIT2(int n_ = 0, int m_ = 0) { init(n_, m_); }
    void init(int n_, int m_)
    {
        n = n_;
        m = m_;
        t1.assign(n + 1, vector<T>(m + 1, T{}));
        t2.assign(n + 1, vector<T>(m + 1, T{}));
        t3.assign(n + 1, vector<T>(m + 1, T{}));
        t4.assign(n + 1, vector<T>(m + 1, T{}));
    }
    void _add(int x, int y, const T &v)
    {
        for (int i = x; i <= n; i += i & -i)
            for (int j = y; j <= m; j += j & -j)
            {
                t1[i][j] += v;
                t2[i][j] += v * x;
                t3[i][j] += v * y;
                t4[i][j] += v * x * y;
            }
    }
    void rangeAdd(int x1, int y1, int x2, int y2, const T &v)
    {
        _add(x1, y1, v);
        _add(x1, y2 + 1, -v);
        _add(x2 + 1, y1, -v);
        _add(x2 + 1, y2 + 1, v);
    }
    T prefixSum(int x, int y)
    {
        T r{};
        for (int i = x; i > 0; i -= i & -i)
            for (int j = y; j > 0; j -= j & -j)
                r += t1[i][j] * (x + 1) * (y + 1) - t2[i][j] * (y + 1) - t3[i][j] * (x + 1) + t4[i][j];
        return r;
    }
    T rangeSum(int x1, int y1, int x2, int y2)
    {
        if (x1 > x2 || y1 > y2)
            return T{};
        return prefixSum(x2, y2) - prefixSum(x1 - 1, y2) - prefixSum(x2, y1 - 1) + prefixSum(x1 - 1, y1 - 1);
    }
};
struct Random
{
    mt19937_64 rng;
    Random() : rng(chrono::steady_clock::now().time_since_epoch().count()) {}
    ull rand_ull(ull max_val = -1) { return rng() % (max_val + 1); }
    ll rand_ll(ll l, ll r) { return uniform_int_distribution<ll>(l, r)(rng); }
    int rand_int(int l, int r) { return uniform_int_distribution<int>(l, r)(rng); }
    double rand_db(double l, double r) { return uniform_real_distribution<double>(l, r)(rng); }
    bool rand_bool(double p = 0.5) { return bernoulli_distribution(p)(rng); }
    template <typename T>
    void shuffle(vector<T> &v) { std::shuffle(v.begin(), v.end(), rng); }
};
ll qmi(ll a, ll b, ll p)
{
    ll res = 1 % p;
    a %= p;
    while (b)
    {
        if (b & 1)
            res = res * a % p;
        a = a * a % p;
        b >>= 1;
    }
    return res;
}
/*

*/
int n, m;
int son[M][K], idx;
int lt[M];//表示 Trie 树上的节点 p，上一次是在处理第 i 个单词时被用到的
int f[N][21];
int lg[N];
ll ans[N];
vector<PII> q[N];//左边界 编号
string s[N];

void insert(string &s)
{
    int p = 0;
    for (auto c : s)
    {
        int u = c - 'a';
        if (!son[p][u])
            son[p][u] = ++idx;
        p = son[p][u];
    }
}

void init()
{
    lg[0] = -1;
    for (int i = 1; i <= n; i++)
    {
        lg[i] = lg[i >> 1] + 1;
    }

    for (int j = 1; j < 20; j++)
        for (int i = 1; i + (1 << j) - 1 <= n; i++)
            f[i][j] = max(f[i][j - 1], f[i + (1 << (j - 1))][j - 1]);
}

void solve()
{
    cin >> n >> m;

    rep(i, 1, n)
    {
        cin >> s[i];
        insert(s[i]);
        f[i][0] = s[i].size();
    }
    init();
    auto query = [&](int l, int r) -> int
    {
        int k = lg[r - l + 1];
        return max(f[l][k], f[r - (1 << k) + 1][k]);
    };
    rep(i, 1, m)
    {
        int l, r;
        cin >> l >> r;
        q[r].push_back({l, (int)i});
    }

    BIT1<int> BT(n);

    rep(i, 1, n)
    {
        int p = 0;
        for (auto c : s[i])
        {
            int u = c - 'a';
            p = son[p][u];

            if (lt[p])
                BT.add(lt[p], -1);
            BT.add(i, 1);

            lt[p] = i;
        }
        for (auto &x : q[i])
        {
            int l = x.fi;
            int id = x.se;
            int cnt = BT.range(l, i);
            int mlen = query(l, i);
            ans[id] = (ll)cnt * 2 - mlen;
        }
    }

    rep(i, 1, m) cout << ans[i] << endl;
}

int main()
{
    IOS;

    int _ = 1;
    // cin>>_;//如果是多组数据
    while (_--)
    {
        solve();
    }
    return 0;
}

先写到这里，歇一会。
谈谈心得吧，去年的这个时候写寒假训练营其实只会一些很入门的题目，虽然也有思维，但是那个时候在算法都不会几个的情况下还是很难坚持下去的，更别说去补题了。当学了一段时间之后我们发现有些题目在学了一些算法和思维的积累下还是会多一些想法的，现在的处境是学了很大一部分的算法，虽然会，但是不够精通，不去思考还是得忘记，然后其实以为学了很多，但是依然还有更多的算法和思想没有体验过。为什么我们一直会忘记了，我感觉很大一部分是没有经常去思考算法是怎么来的，其次就是太功利性的学习，对学习一种完成态度的心态。的确，在大学各种事情的打扰下，能够专心做出一些事情是很不错的，但是我们似乎从未认真享受过算法的每个问题，更像是当作任务，如果每次都记忆深刻写题解大概也不会那么多人倡导了吧，或许着就是写题解的意义吧。

2025牛客寒假训练营部分补题题解