报社天狗解题报告

简要题意

给定数组

\set {a_i},\set{b_i}

且初始全为

1

，数组大小均为

N

。

T

组操作如下：

单点修改 $a_i$ 或 $b_i$ ，其中 $i \in [1,N]\cap \mathbb Z$
每次查询给定 $n$ ，查询该式： $\sum\limits_{i=1}^{n}a_i\sum\limits_{j=\mathrm d(i)}^{\lfloor\frac ni\rfloor-1}b_j$ ，其中 $\mathrm d(i)$ 表示 $i$ 的因子数

N \leq 10^6,\;T \leq 10^5

。

约定与性质

为了方便处理，我们将

b

数组向上平移

1

，即求

\sum\limits_{i=1}^{n}a_i\sum\limits_{j=\mathrm d(i)+1}^{\lfloor\frac ni\rfloor}b_j

。

有一个常数是

10^6

以内

\mathrm d(i)

最大为

240

。

算法一

考虑答案从

n-1

到

n

的变化量，处理出来。时间复杂度

\mathcal{O(n\log n)}

。

期望得分：

10

pts。

算法二

对于 subtest 2，转化题意为给定序列

a_{1,\dots, n},b_{1,\dots,n}

，二元组集合

\mathbf E

，其中

|\mathbf E|=\mathcal O(n\log n)

。单点修改，全局查询

\sum\limits_{(x,y)\in E}a_xb_y

。使用根号分治可以做到

T\sqrt {n\log n}

。也可以使用询问分块，时间复杂度相同，存在在线方式。也可以转化题意成

\sum\limits_{i=1}^{n}a_ib_j[l_i\leq j \leq r_i]

，然后使用 k-d tree 做，时间复杂度

\mathcal{O}(n\log n + T\sqrt{n})

。

该档期望得分：

20

pts，结合 subtest 1 可获得

30

pts。

算法三

考虑分块打表后使用解法 1 中的递推求解。令块长为

{D}

，则有递推部分复杂度为

\mathcal{O}( {D} \log {D})

。但由于打表的值会改变，考虑直接维护其值。变换式子形式为

\sum_{ij\leq n}^{}a_ib_j\left[\mathrm{d}(i) < j\right]

。转化一下，变成

\sum_{i=1}^{n}\left[\left\lfloor\dfrac{n}{i}\right\rfloor \geq \mathrm d(i)\right]a_i\sum_{j=1}^{\lfloor\frac{n}{i}\rfloor}b_j-\sum_{i=1}^{n}\left[i\leq \left\lfloor\dfrac{n}{\mathrm{d}(i)}\right\rfloor\right]a_i\sum_{j=1}^{\mathrm{d}(i)}b_j

前半部分因为对于每一个表，它的数论分块是静态的，直接维护每一块查询的

a

的和的值；对于后半部分，我们考虑枚举

\mathrm{d}(i)

的值，用 vector 存储对应的

i

，二分找到位置树状数组维护即可，时间复杂度

\mathcal{O}(\sum_{i=1}^{240}\log{sizd_i}) \approx \mathcal{O}(\sqrt{n})

。平衡一下可以做到

\mathcal{O}(n\sqrt{n\log n})

。

该档期望得分：

30

pts（大概不卡常吧），结合以上可得

60

pts。

算法四

令

\mathtt{sumb}(n) = \sum_{i=1}^{n}b_i

。

然后，我们变换一下式子形态

\begin{aligned} \sum_{i=1}^{n}a_i\sum_{j=\mathrm d(i) + 1}^{\lfloor\frac ni\rfloor}b_j&=\sum_{i=1}^{n}a_i\left[\left\lfloor\dfrac{n}{i}\right\rfloor \geq \mathrm d(i)\right]\left(\mathtt{sumb}\left(\left\lfloor\dfrac{n}{i}\right\rfloor\right)-\mathtt{sumb}(\mathrm d(i))\right)\\ &=\sum_{i=1}^{n}a_i\left[i\times \mathrm d(i)\leq n\right]\left(\mathtt{sumb}\left(\left\lfloor\dfrac{n}{i}\right\rfloor\right)-\mathtt{sumb}(\mathrm d(i))\right)\\ \end{aligned}

这时，对于满足

i\times\mathrm d(i)

在

10^6

范围内的

i

，我们将

i

放入

\mathbf D_{d(i)}

集合中。而

\mathbf D

中只有少部分集合元素很多，其余的都很少。这里我们如果以元素数量是否大于

100

来断言少与多的话，则集合元素很多的集合只有

19

个，而集合元素很少的集合的元素总数量只有

187

。

我们令

K=19,P=187

。

我们定义

\mathbf E

为集合元素很多的集合的集合，

\mathbf F

为集合元素很少的集合的集合。

于是我们分为两部分讨论。对于集合元素很少的集合我们直接暴力枚举判断即可。而另外一部分，我们可以先数论分块，令目前的

\left\lfloor\dfrac{n}{i}\right\rfloor

的值为

v

，即只需要查询区间中

\mathbf D_{\mathrm d(i)} \in \mathbf E,\mathrm d(i)\leq v

的

i

的数量与

\mathtt{sumb}(\mathrm d(i))

的和。查询需要做到

\mathcal O(1)

。

考虑维护上面我们需要查询的东西。尽管我们查询的是二维前缀和，但有一维很小（只有

19

），我们可以在修改时对于每一行修改，这样就没有影响了。这样，只需要我们对于每一行维护一个

\mathcal O(\sqrt[3] n)\to\mathcal O(1)

的分块即可。这样我们每次修改就是

\mathcal O(K\sqrt[3] n)

了。当然，我们也需要一个

\mathcal O(\sqrt n)\to\mathcal O(1)

的分块维护

b

的前缀和。

总时间复杂度为

\mathcal O(n+T(\sqrt n + K\sqrt [3]n + P))

，空间复杂度为

\mathcal O(Kn)

。

期望得分：

100

pts。

Code

此代码未经卡常，不是最后一版。

CPP

#include <bits/stdc++.h>
using namespace std;

using ci = const int;

using u32 = uint32_t;
using i64 = int64_t;
using u64 = uint64_t;

template <class T>
inline void Max(T &x, const T &y) noexcept {
  if (x < y) x = y;
}
template <class T>
inline void Min(T &x, const T &y) noexcept {
  if (y < x) x = y;
}

/**
 * @brief Fast input & output
 * @param read Input
 */
namespace IO {

template <class T>
void read(T &x) {
  x = 0;
  static char ch;
  bool sm(0);
  while (!isdigit(ch = getchar())) sm = ch == '-';
  while (x = 10 * x + (ch ^ 48), isdigit(ch = getchar()));
  if (sm) x = -x;
}

template <class T>
void write(T x) {
  if (x < 0) putchar('-'), x = -x;
  static char q[40];
  char *qt(q);
  do *qt++ = x % 10 ^ 48;
  while (x /= 10);
  do putchar(*--qt);
  while (qt != q);
}

template <class T, class... Ts>
void read(T &x, Ts &...rest) {
  read(x);
  read(rest...);
}

}  // namespace IO
using IO::read;
using IO::write;

const int N = 1e6 + 5;
const int MaxD = 240;
const int MasterD = 13;
const int CutNumber = 200;

vector<int> vd[MaxD + 5];  // sets with the same number of factors
vector<int> ld;            // numbers with fewer factors

bool tong[N];
int pri[N], d[N];
int a[N], b[N];

// numbers with more factors
int md[MasterD], md_t, md_i[MaxD + 5];

/**
 * @brief to speed up the change of add on array
 * @param l the begin of the array
 * @param r the end of the array
 * @param v the value of the addition
 */
void AddArray(i64 *l, i64 *r, ci &v) noexcept {
#define did(i) (l[i] += v)
#define _(s) \
  case s:    \
    did(s - 1);
  while (l < r) {
    switch (r - l) {
      default:
        did(7);
        _(7) _(6) _(5) _(4) _(3) _(2) _(1)
    }
    l += 8;
  }
#undef _
#undef did
}

/**
 * @brief O(sqrt n) -> O(1)
 * @details Only for array B
 */
struct Block2 {
  i64 b1[1 << 10], b2[1 << 20];

  void add(int x, int y) noexcept {
    AddArray(b1 + (x >> 10), b1 + (1 << 10), y);
    AddArray(b2 + x, b2 + (x >> 10 << 10) + (1 << 10), y);
  }

  i64 ask(int x) noexcept {
    return ((x >> 10) ? b1[(x >> 10) - 1] : 0) + b2[x];
  }
};

/**
 * @brief O(sqrt[3] n) -> O(1)
 * @details Only for array A
 */
struct Block3 {
  static const int B = 127, BB = (1 << 14) - (1 << 7);
  i64 b1[1 << 7], b2[1 << 14], b3[1 << 21];

  void add(int x, int y) noexcept {
    AddArray(b1 + (x >> 14), b1 + (1 << 7), y);
    AddArray(b2 + (x >> 7), b2 + ((x >> 7) & BB) + (1 << 7), y);
    AddArray(b3 + x, b3 + (x >> 7 << 7) + (1 << 7), y);
  }

  i64 ask(int x) noexcept {
    return ((x >> 14) ? b1[(x >> 14) - 1] : 0) +
           ((x >> 7) & B ? b2[(x >> 7) - 1] : 0) + b3[x];
  }
};

int mxd[N];

// to initialize something about the factors.
void init_d(ci n) {
  memset(md_i, 0xff, sizeof md_i);

  d[1] = 1;
  int *pt = pri;
  for (int i = 2; i <= n; ++i) {
    if (!tong[i]) *pt++ = i, d[i] = 2;
    for (int *j = pri; i * *j <= n; ++j) {
      tong[i * *j] = 1;
      d[i * *j] = d[i] << 1;
      if (!(i % *j)) {
        d[i * *j] -= d[i / *j];
        break;
      }
    }
  }

  for (int i = 1; i <= n; ++i)
    if (i * d[i] <= n) vd[d[i]].push_back(i);
  for (int i = 1; i <= MaxD; ++i) {
    if (vd[i].size() > CutNumber) {
      md_i[i] = md_t, md[md_t++] = i;
    } else {
      for (ci &x : vd[i]) ld.push_back(x);
    }
  }
  for (int i = 1; i <= MaxD; ++i)
    if (!~md_i[i]) md_i[i] = md_i[i - 1];
  for (int i = 1; i <= n; ++i) mxd[i] = max(mxd[i - 1], d[i]);
}

Block2 B;
Block3 A2, A[MasterD];

void changeA(int x, int y) noexcept {
  if (x > N - 5) return;
  // delta of a[x]
  int dlt = y - a[x];
  a[x] = y;
  if (x * d[x] > N - 5) return;

  A2.add(x, dlt);
  if (md_i[d[x]] != md_i[d[x] - 1]) {
    for (int i = md_i[d[x]]; i != md_t; ++i) A[i].add(x, dlt);
  }
}

void changeB(int x, int y) noexcept {
  if (++x > N - 5) return;
  B.add(x, y - b[x]);
  b[x] = y;
}

i64 query(int n) noexcept {
  i64 ans(0);
  static Block3 *_A;
  int m = n / mxd[n];
  for (int l = m + 1, r; l <= n; l = r + 1) {
    r = n / (n / l);
    int t = md_i[min(MaxD, n / l)];
    if (!~t) continue;
    _A = &A[t];
    ans += (_A->ask(r) - _A->ask(l - 1)) * B.ask(n / l);
  }
  for (int i = 0; i != md_t; ++i)
    ans -= (A[i].ask(n / md[i]) - (i ? A[i - 1].ask(n / md[i]) : 0)) *
           B.ask(md[i]);
  for (ci &i : ld)
    if (i * d[i] <= n) {
      if (i > m)
        ans += a[i] * (B.ask(n / i) - B.ask(d[i]));
      else
        ans -= a[i] * B.ask(d[i]);
    }

  for (int l = 1, r; l <= m; l = r + 1) {
    r = min(n / (n / l), m);
    ans += B.ask(n / l) * (A2.ask(r) - A2.ask(l - 1));
  }
  return ans;
}

int main() {
  init_d(N - 5);
  for (int i = 1; i <= N - 5; ++i) changeA(i, 1), b[i] = 1;
  for (int i = 0; i < (1 << 10); ++i) {
    B.b1[i] = ((i + 1) << 10);
    iota(B.b2 + (i << 10), B.b2 + (i << 10) + (1 << 10), 1);
  }

  int Q;
  read(Q);
  while (Q--) {
    static int opt, n, x, y;
    read(opt);
    switch (opt) {
      case 1:
        read(n);
        write(query(n));
        putchar('\n');
        break;
      case 2:
        read(x, y);
        changeA(x, y);
        break;
      case 3:
        read(x, y);
        changeB(x, y);
        break;
    }
  }

  return 0;
}

常数优化

查询的前面很大一部分是一定满足条件的，而后半部分的询问数较少，分组计算即可。

bonus

该题的瓶颈在于

\sum\limits_{i\times j \leq n}a_i b_j

的维护，单次查询复杂度

\mathcal O(\sqrt n)

，可否优化？

报社天狗解题报告

文章操作

简要题意

约定与性质

算法一

算法二

算法三

算法四

Code

常数优化

bonus

相关推荐

评论

报社天狗解题报告