Compare commits
2 commits
8eff17c1be
...
3c39a7e58f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3c39a7e58f | ||
|
|
e9b06b766e |
1 changed files with 32 additions and 21 deletions
|
|
@ -98,45 +98,54 @@ template <typename R> vector<R> poly_normalize(vector<R> &a) {
|
||||||
return vector(a.begin(), a.begin() + i + 1);
|
return vector(a.begin(), a.begin() + i + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Basic polynomial multiplication
|
// Computes basic multiplication, assuming result is initialized as 0 and has enough space
|
||||||
template <typename R> vector<R> poly_mult_basic(vector<R> &a, vector<R> &b) {
|
template <typename R> void poly_mult_basic_span(span<R> &a, span<R> &b, span<R> &result) {
|
||||||
if (a.size() == 0 && b.size() == 0)
|
|
||||||
return vector<R>(0);
|
|
||||||
auto res = vector<R>(a.size() + b.size() - 1, 0);
|
|
||||||
for (size_t i = 0; i < a.size(); i++) {
|
for (size_t i = 0; i < a.size(); i++) {
|
||||||
// Start with i 0s
|
// Add at i-th position
|
||||||
auto tmp = vector<R>(i, 0);
|
auto at_ith = result.subspan(i, b.size());
|
||||||
for (R bj : b) {
|
for (size_t j = 0; j < b.size(); j++) {
|
||||||
tmp.push_back(a[i] * bj);
|
at_ith[j] = at_ith[j] + a[i] * b[j];
|
||||||
}
|
}
|
||||||
res = poly_add(res, tmp);
|
|
||||||
}
|
}
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define THRESHOLD 32
|
// Basic polynomial multiplication.
|
||||||
// TODO Reduce allocations
|
template <typename R> vector<R> poly_mult_basic(vector<R> &a, vector<R> &b) {
|
||||||
|
if (a.empty() && b.empty())
|
||||||
|
return vector<R>(0);
|
||||||
|
|
||||||
|
vector<R> result = vector<R> (a.size() + b.size() - 1);
|
||||||
|
auto span_a = span(a);
|
||||||
|
auto span_b = span(b);
|
||||||
|
auto span_result = span(result);
|
||||||
|
poly_mult_basic_span(span_a, span_b, span_result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define THRESHOLD 16
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A step of the Karatsuba function.
|
* A step of the Karatsuba function.
|
||||||
|
*
|
||||||
|
* NOTE: interestingly, the basic case is quite a performance bottleneck.
|
||||||
|
* Hence, the basic case needs to be implemented well.
|
||||||
|
*
|
||||||
* @param deg_bnd power-of-2 degree bound
|
* @param deg_bnd power-of-2 degree bound
|
||||||
* @param buffer the buffer which is used only throughout the invocation
|
* @param buffer the buffer which is used only throughout the invocation
|
||||||
*/
|
*/
|
||||||
template <typename R>
|
template <typename R>
|
||||||
void poly_mult_Karatsuba_step(const size_t deg_bnd, span<R> &a, span<R> &b,
|
void poly_mult_Karatsuba_step(const size_t deg_bnd, span<R> &a, span<R> &b,
|
||||||
span<R> &result, span<R> &buffer) {
|
span<R> &result, span<R> &buffer) {
|
||||||
if (deg_bnd <= THRESHOLD) {
|
|
||||||
auto vec_a = vector(a.begin(), a.end());
|
|
||||||
auto vec_b = vector(b.begin(), b.end());
|
|
||||||
auto result_vec = poly_mult_basic(vec_a, vec_b);
|
|
||||||
copy(result_vec.begin(), result_vec.end(), result.begin());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Result may be reused, so this needs clearing
|
// Result may be reused, so this needs clearing
|
||||||
for (auto &entry : result)
|
for (auto &entry : result)
|
||||||
entry = 0;
|
entry = 0;
|
||||||
|
|
||||||
|
if (deg_bnd <= THRESHOLD) {
|
||||||
|
poly_mult_basic_span(a, b, result);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const auto next_bnd = deg_bnd >> 1;
|
const auto next_bnd = deg_bnd >> 1;
|
||||||
auto a0 = a.subspan(0, next_bnd);
|
auto a0 = a.subspan(0, next_bnd);
|
||||||
auto a1 = a.subspan(next_bnd, next_bnd);
|
auto a1 = a.subspan(next_bnd, next_bnd);
|
||||||
|
|
@ -265,5 +274,7 @@ int main() {
|
||||||
only_Karatsuba(65536);
|
only_Karatsuba(65536);
|
||||||
only_Karatsuba(131072);
|
only_Karatsuba(131072);
|
||||||
|
|
||||||
|
only_Karatsuba(1 << 20);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Reference in a new issue