On Android arm64-v8a,
template <typename T>
xt::xtensor<T, 2> logSoftmax(const xt::xtensor<T, 2> &matrix)
{
xt::xtensor<T, 2> maxVals = xt::amax(matrix, {1}, xt::keep_dims);
auto shifted = matrix - maxVals;
auto expVals = xt::exp(shifted);
auto sumExp = xt::sum(expVals, {1}, xt::keep_dims);
return shifted - xt::log(sumExp);
}
logSoftmax() cannot produce right results, sometimes crashes.
template <typename T>
xt::xtensor<T, 2> logSoftmax2(const xt::xtensor<T, 2> &matrix)
{
xt::xtensor<T, 2> maxVals = xt::amax(matrix, {1}, xt::keep_dims);
xt::xtensor<T, 2> shifted = matrix - maxVals;
xt::xtensor<T, 2> expVals = xt::exp(shifted);
xt::xtensor<T, 2> sumExp = xt::sum(expVals, {1}, xt::keep_dims);
return shifted - xt::log(sumExp);
}
but logSoftmax2() works properly.
CMakeLists.txt
...
add_library(dp dp.cpp)
target_link_libraries(dp PRIVATE
xtensor xtensor::optimize xtensor::use_xsimd xtensor-blas BLAS::BLAS)
if(ANDROID)
target_link_options(dp PUBLIC -Wl,-z,max-page-size=16384)
else()
target_compile_options(dp PUBLIC -march=native)
endif()
...
built with -DCMAKE_BUILD_TYPE=Release.
Note: when built with -DCMAKE_BUILD_TYPE=RelWithDebInfo, logSoftmax() works properly too.