Compare commits

...

2 commits

Author SHA1 Message Date
Abastro
3c39a7e58f More edit 2025-03-25 22:57:59 +09:00
Abastro
e9b06b766e Eliminated multiplications 2025-03-25 22:50:49 +09:00

View file

@ -98,45 +98,54 @@ template <typename R> vector<R> poly_normalize(vector<R> &a) {
return vector(a.begin(), a.begin() + i + 1);
}
// Basic polynomial multiplication
template <typename R> vector<R> poly_mult_basic(vector<R> &a, vector<R> &b) {
if (a.size() == 0 && b.size() == 0)
return vector<R>(0);
auto res = vector<R>(a.size() + b.size() - 1, 0);
// Computes basic multiplication, assuming result is initialized as 0 and has enough space
template <typename R> void poly_mult_basic_span(span<R> &a, span<R> &b, span<R> &result) {
for (size_t i = 0; i < a.size(); i++) {
// Start with i 0s
auto tmp = vector<R>(i, 0);
for (R bj : b) {
tmp.push_back(a[i] * bj);
// Add at i-th position
auto at_ith = result.subspan(i, b.size());
for (size_t j = 0; j < b.size(); j++) {
at_ith[j] = at_ith[j] + a[i] * b[j];
}
res = poly_add(res, tmp);
}
return res;
}
#define THRESHOLD 32
// TODO Reduce allocations
// Basic polynomial multiplication.
template <typename R> vector<R> poly_mult_basic(vector<R> &a, vector<R> &b) {
if (a.empty() && b.empty())
return vector<R>(0);
vector<R> result = vector<R> (a.size() + b.size() - 1);
auto span_a = span(a);
auto span_b = span(b);
auto span_result = span(result);
poly_mult_basic_span(span_a, span_b, span_result);
return result;
}
#define THRESHOLD 16
/**
* A step of the Karatsuba function.
*
* NOTE: interestingly, the basic case is quite a performance bottleneck.
* Hence, the basic case needs to be implemented well.
*
* @param deg_bnd power-of-2 degree bound
* @param buffer the buffer which is used only throughout the invocation
*/
template <typename R>
void poly_mult_Karatsuba_step(const size_t deg_bnd, span<R> &a, span<R> &b,
span<R> &result, span<R> &buffer) {
if (deg_bnd <= THRESHOLD) {
auto vec_a = vector(a.begin(), a.end());
auto vec_b = vector(b.begin(), b.end());
auto result_vec = poly_mult_basic(vec_a, vec_b);
copy(result_vec.begin(), result_vec.end(), result.begin());
return;
}
// Result may be reused, so this needs clearing
for (auto &entry : result)
entry = 0;
if (deg_bnd <= THRESHOLD) {
poly_mult_basic_span(a, b, result);
return;
}
const auto next_bnd = deg_bnd >> 1;
auto a0 = a.subspan(0, next_bnd);
auto a1 = a.subspan(next_bnd, next_bnd);
@ -265,5 +274,7 @@ int main() {
only_Karatsuba(65536);
only_Karatsuba(131072);
only_Karatsuba(1 << 20);
return 0;
}