Skip to content

x{OR,UN}M{QR,RQ,LQ,QL} optimal workspace size is always > 4096 #546

@christoph-conrads

Description

@christoph-conrads

Description

The functions multiplying elementary reflectors with matrices called

  • {S,D}ORMQR,
  • {S,D}ORMRQ,
  • {S,D}ORMLQ,
  • {S,D}ORMQL,
  • {C,Z}UNMQR,
  • {C,Z}UNMRQ,
  • {C,Z}UNMLQ,
  • {C,Z}UNMQL

return at least 4096 for the optimal workspace size even for dimensions as tiny as zero or one. My expectation is that the optimal workspace size is less than 4096 for tiny numbers of columns and rows. The behavior occurs for SORMQR because the optimal workspace size is computed as

LWKOPT = NW*NB + TSIZE

where TSIZE = LDT * NBMAX = 64 * 65 = 4160.

Relevant initialization code:

lapack/SRC/sormqr.f

Lines 185 to 187 in 2dafa3d

INTEGER NBMAX, LDT, TSIZE
PARAMETER ( NBMAX = 64, LDT = NBMAX+1,
$ TSIZE = LDT*NBMAX )

Workspace computation:

lapack/SRC/sormqr.f

Lines 241 to 249 in 2dafa3d

IF( INFO.EQ.0 ) THEN
*
* Compute the workspace requirements
*
NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K,
$ -1 ) )
LWKOPT = NW*NB + TSIZE
WORK( 1 ) = LWKOPT
END IF

Demo program in C++11 (compile with c++ -Wextra -Wall -pedantic -std=c++11 sormqr-workspace.cpp -llapack -L/path/to/lapack-build/lib, run with env LD_LIBRARY_PATH=/tmp/path/to/lapack-build/lib ./a.out):

#include <cassert>
#include <complex>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>

#include <algorithm>
#include <limits>

#define lapack_complex_float float _Complex
#define lapack_complex_double double _Complex

extern "C" {

typedef int32_t lapack_int;

void dormqr_(char const* side, char const* trans, lapack_int const* m,
    lapack_int const* n, lapack_int const* k, double const* A,
    lapack_int const* lda, double const* tau, double* C, lapack_int const* ldc,
    double* work, lapack_int const* lwork, lapack_int* info, size_t side_len,
    size_t trans_len);

void sormqr_(char const* side, char const* trans, lapack_int const* m,
    lapack_int const* n, lapack_int const* k, float const* A,
    lapack_int const* lda, float const* tau, float* C, lapack_int const* ldc,
    float* work, lapack_int const* lwork, lapack_int* info, size_t side_len,
    size_t trans_len);

void cunmqr_(char const* side, char const* trans, lapack_int const* m,
    lapack_int const* n, lapack_int const* k, lapack_complex_float const* A,
    lapack_int const* lda, lapack_complex_float const* tau,
    lapack_complex_float* C, lapack_int const* ldc, lapack_complex_float* work,
    lapack_int const* lwork, lapack_int* info, size_t side_len,
    size_t trans_len);

void zunmqr_(char const* side, char const* trans, lapack_int const* m,
    lapack_int const* n, lapack_int const* k, lapack_complex_double const* A,
    lapack_int const* lda, lapack_complex_double const* tau,
    lapack_complex_double* C, lapack_int const* ldc,
    lapack_complex_double* work, lapack_int const* lwork, lapack_int* info,
    size_t side_len, size_t trans_len);
}

int main()
{
    constexpr auto nan_s = std::numeric_limits<float>::quiet_NaN();
    constexpr auto nan_d = std::numeric_limits<double>::quiet_NaN();

    auto side = 'L';
    auto trans = 'N';
    auto a_s = float{ 1.0 };
    auto a_d = double{ 1.0 };
    auto a_c = std::complex<float>{ 1.0, 2.0 };
    auto a_z = std::complex<double>{ 1.0, 2.0 };
    auto tau_s = nan_s;
    auto tau_d = nan_d;
    auto tau_c = std::complex<float>{ 1.0, 2.0 };
    auto tau_z = std::complex<double>{ 1.0, 2.0 };
    auto c_s = float{ 1.0 };
    auto c_d = double{ 1.0 };
    auto c_c = std::complex<float>{ 1.0, 2.0 };
    auto c_z = std::complex<double>{ 1.0, 2.0 };
    auto work_s = nan_s;
    auto work_d = nan_d;
    auto work_c = std::complex<float>{ nan_s, nan_s };
    auto work_z = std::complex<double>{ nan_s, nan_s };
    auto lwork = lapack_int{ -1 };
    auto info = lapack_int{ -1 };
    auto side_len = std::size_t{ 1 };
    auto trans_len = std::size_t{ 1 };

    for (auto m = lapack_int{ 0 }; m < 3; ++m) {
        auto lda = std::max(m, lapack_int{ 1 });
        auto ldc = std::max(m, lapack_int{ 1 });
        for (auto n = lapack_int{ 0 }; n < 3; ++n) {
            auto k_max = side == 'L' ? m : n;
            for (auto k = lapack_int{ 0 }; k <= k_max; ++k) {
                sormqr_(&side, &trans, &m, &n, &k, &a_s, &lda, &tau_s, &c_s,
                    &ldc, &work_s, &lwork, &info, side_len, trans_len);

                auto opt_s = work_s;

                if (info) {
                    fprintf(stderr, "sormqr m=%d n=%d k=%d returned info=%d\n",
                        m, n, k, info);
                } else {
                    printf("sormqr m=%d n=%d k=%d work=%g\n", m, n, k, work_s);
                }

                dormqr_(&side, &trans, &m, &n, &k, &a_d, &lda, &tau_d, &c_d,
                    &ldc, &work_d, &lwork, &info, side_len, trans_len);

                if (info) {
                    fprintf(stderr, "dormqr m=%d n=%d k=%d returned info=%d\n",
                        m, n, k, info);
                } else if (opt_s != work_d) {
                    printf("dormqr m=%d n=%d k=%d work=%g\n", m, n, k, work_d);
                }

                cunmqr_(&side, &trans, &m, &n, &k, (lapack_complex_float*)&a_c,
                    &lda, (lapack_complex_float*)&tau_c,
                    (lapack_complex_float*)&c_c, &ldc,
                    (lapack_complex_float*)&work_c, &lwork, &info, side_len,
                    trans_len);
                if (info) {
                    fprintf(stderr, "cunmqr m=%d n=%d k=%d returned info=%d\n",
                        m, n, k, info);
                } else if(opt_s != std::real(work_c)) {
                    printf("cunmqr m=%d n=%d k=%d work=%g\n", m, n, k,
                        std::real(work_c));
                }

                zunmqr_(&side, &trans, &m, &n, &k, (lapack_complex_double*)&a_z,
                    &lda, (lapack_complex_double*)&tau_z,
                    (lapack_complex_double*)&c_z, &ldc,
                    (lapack_complex_double*)&work_z, &lwork, &info, side_len,
                    trans_len);
                if (info) {
                    fprintf(stderr, "zunmqr m=%d n=%d k=%d returned info=%d\n",
                        m, n, k, info);
                } else if(opt_s != std::real(work_z)) {
                    printf("zunmqr m=%d n=%d k=%d work=%g\n", m, n, k,
                        std::real(work_z));
                }
            }
        }
    }
}

SORMRQ, SORMQL return 1 as the optimal workspace size if m == 0 or n == 0. Not all parameter combinations were tested.

Checklist

  • I've included a minimal example to reproduce the issue
  • I'd be willing to make a PR to solve this issue [Sure -- if I knew where to start. Strictly speaking the code is working fine.]

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions