diff --git a/quest/include/operations.h b/quest/include/operations.h index 0898165af..0fb06560f 100644 --- a/quest/include/operations.h +++ b/quest/include/operations.h @@ -89,6 +89,7 @@ extern "C" { * - getCompMatr1() * - getInlineCompMatr1() * - applyCompMatr1() + * - postMultiplyCompMatr1() * - applyQubitProjector() * - multiplyCompMatr() * @author Tyson Jones @@ -96,6 +97,57 @@ extern "C" { void multiplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix); +/** @notyettested + * + * Multiplies a general one-qubit dense @p matrix upon the specified @p target + * qubit of the density matrix @p qureg, from the right-hand side. + * + * @formulae + * Let @f$ \dmrho = @f$ @p qureg, @f$ \hat{M} = @f$ @p matrix and @f$ t = @f$ @p target, + * and notate @f$\hat{M}_t@f$ as per applyCompMatr1(). Unlike applyCompMatr1() however, + * this function only ever right-multiplies @p matrix upon @p qureg. + * + * Explicitly + * @f[ + \dmrho \rightarrow \dmrho \, \hat{M}_t + * @f] + * where @f$ \hat{M} @f$ is not conjugated nor transposed, and there are no additional + * constraints like unitarity. + * + * In general, this function will break the normalisation of @p qureg and result in a + * non-physical state, and is useful for preparing sub-expressions of formulae like + * the Linbladian. + * + * @myexample + * ``` + Qureg qureg = createDensityQureg(5); + + CompMatr1 matrix = getInlineCompMatr1({ + {0.1, 0.2}, + {0.3i, 0.4i} + }); + + postMultiplyCompMatr1(qureg, 2, matrix); + * ``` + * + * @param[in,out] qureg the state to modify. + * @param[in] target the index of the target qubit. + * @param[in] matrix the Z-basis matrix to post-multiply. + * @throws @validationerror + * - if @p qureg or @p matrix are uninitialised. + * - if @p qureg is not a density matrix. + * - if @p target is an invalid qubit index. + * @see + * - getCompMatr1() + * - getInlineCompMatr1() + * - applyCompMatr1() + * - multiplyCompMatr1() + * - multiplyCompMatr() + * @author Tyson Jones + */ +void postMultiplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix); + + /** Applies a general one-qubit dense unitary @p matrix to the specified @p target * qubit of @p qureg. * @@ -162,6 +214,7 @@ digraph { * - getCompMatr1() * - getInlineCompMatr1() * - multiplyCompMatr1() + * - postMultiplyCompMatr1() * - applyControlledCompMatr1() * - applyCompMatr2() * - applyCompMatr() @@ -346,6 +399,14 @@ extern "C" { void multiplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matr); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +/// @see +/// - postMultiplyCompMatr1 +void postMultiplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix); + + /** @notyetdoced * * Applies a general two-qubit dense unitary @p matrix to qubits @p target1 and @@ -557,6 +618,14 @@ extern "C" { void multiplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +/// @see +/// - postMultiplyCompMatr1 +void postMultiplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix); + + /** @notyetdoced * * @formulae @@ -612,6 +681,14 @@ void applyMultiStateControlledCompMatr(Qureg qureg, int* controls, int* states, void multiplyCompMatr(Qureg qureg, std::vector targets, CompMatr matr); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +/// @cppvectoroverload +/// @see postMultiplyCompMatr() +void postMultiplyCompMatr(Qureg qureg, std::vector targets, CompMatr matr); + + /// @notyettested /// @notyetvalidated /// @notyetdoced @@ -667,6 +744,12 @@ extern "C" { void multiplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matr); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +void postMultiplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix); + + /// @notyetdoced /// @see applyCompMatr1() void applyDiagMatr1(Qureg qureg, int target, DiagMatr1 matr); @@ -734,6 +817,12 @@ extern "C" { void multiplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matr); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +void postMultiplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix); + + /// @notyetdoced /// @see applyCompMatr1() void applyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matr); @@ -801,6 +890,12 @@ extern "C" { void multiplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +void postMultiplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix); + + /// @notyetdoced /// @see applyCompMatr1() void applyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix); @@ -828,6 +923,12 @@ void applyMultiStateControlledDiagMatr(Qureg qureg, int* controls, int* states, void multiplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr matrix, qcomp exponent); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +void postMultiplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr matrix, qcomp exponent); + + /** @notyetdoced * * @formulae @@ -874,6 +975,14 @@ void applyMultiStateControlledDiagMatrPower(Qureg qureg, int* controls, int* sta void multiplyDiagMatr(Qureg qureg, std::vector targets, DiagMatr matrix); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +/// @cppvectoroverload +/// @see postMultiplyDiagMatr() +void postMultiplyDiagMatr(Qureg qureg, std::vector targets, DiagMatr matrix); + + /// @notyettested /// @notyetvalidated /// @notyetdoced @@ -914,6 +1023,14 @@ void applyMultiStateControlledDiagMatr(Qureg qureg, std::vector controls, s void multiplyDiagMatrPower(Qureg qureg, std::vector targets, DiagMatr matrix, qcomp exponent); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +/// @cppvectoroverload +/// @see postMultiplyDiagMatrPower() +void postMultiplyDiagMatrPower(Qureg qureg, std::vector targets, DiagMatr matrix, qcomp exponent); + + /// @notyettested /// @notyetvalidated /// @notyetdoced @@ -979,6 +1096,18 @@ void multiplyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix); void multiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix); + + +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent); + + /// @notyetdoced /// @notyetvalidated void applyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix); @@ -1143,6 +1272,12 @@ extern "C" { void multiplySwap(Qureg qureg, int qubit1, int qubit2); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplySwap(Qureg qureg, int qubit1, int qubit2); + + /** Applies a SWAP gate between @p qubit1 and @p qubit2 of @p qureg. * * @diagram @@ -1264,20 +1399,41 @@ extern "C" { /// @notyetdoced +/// @notyettested /// @see multiplyCompMatr1() void multiplyPauliX(Qureg qureg, int target); /// @notyetdoced +/// @notyettested /// @see multiplyCompMatr1() void multiplyPauliY(Qureg qureg, int target); /// @notyetdoced +/// @notyettested /// @see multiplyCompMatr1() void multiplyPauliZ(Qureg qureg, int target); +/// @notyetdoced +/// @notyettested +/// @see postMultiplyCompMatr1() +void postMultiplyPauliX(Qureg qureg, int target); + + +/// @notyetdoced +/// @notyettested +/// @see postMultiplyCompMatr1() +void postMultiplyPauliY(Qureg qureg, int target); + + +/// @notyetdoced +/// @notyettested +/// @see postMultiplyCompMatr1() +void postMultiplyPauliZ(Qureg qureg, int target); + + /// @notyetdoced void applyPauliX(Qureg qureg, int target); @@ -1408,6 +1564,12 @@ extern "C" { void multiplyPauliStr(Qureg qureg, PauliStr str); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyPauliStr(Qureg qureg, PauliStr str); + + /// @notyetdoced void applyPauliStr(Qureg qureg, PauliStr str); @@ -1796,6 +1958,12 @@ extern "C" { void multiplyPauliGadget(Qureg qureg, PauliStr str, qreal angle); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyPauliGadget(Qureg qureg, PauliStr str, qreal angle); + + /** @notyetdoced * * @formulae @@ -1929,6 +2097,12 @@ extern "C" { void multiplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle); + + /** @notyetdoced * * @formulae @@ -2201,6 +2375,14 @@ void applyMultiQubitPhaseShift(Qureg qureg, int* targets, int numTargets, qreal void multiplyPhaseGadget(Qureg qureg, std::vector targets, qreal angle); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +/// @cppvectoroverload +/// @see postMultiplyPhaseGadget() +void postMultiplyPhaseGadget(Qureg qureg, std::vector targets, qreal angle); + + /// @notyettested /// @notyetvalidated /// @notyetdoced @@ -2273,6 +2455,12 @@ extern "C" { void multiplyPauliStrSum(Qureg qureg, PauliStrSum sum, Qureg workspace); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyPauliStrSum(Qureg qureg, PauliStrSum sum, Qureg workspace); + + /** @notyettested * * Effects (an approximation to) the exponential of @p sum, weighted by @p angle, upon @p qureg, @@ -2558,6 +2746,12 @@ extern "C" { void multiplyMultiQubitNot(Qureg qureg, int* targets, int numTargets); +/// @notyetdoced +/// @notyettested +/// @notyetvalidated +void postMultiplyMultiQubitNot(Qureg qureg, int* targets, int numTargets); + + /// @notyetdoced void applyMultiQubitNot(Qureg qureg, int* targets, int numTargets); @@ -2592,6 +2786,14 @@ void applyMultiStateControlledMultiQubitNot(Qureg qureg, int* controls, int* sta void multiplyMultiQubitNot(Qureg qureg, std::vector targets); +/// @notyettested +/// @notyetvalidated +/// @notyetdoced +/// @cppvectoroverload +/// @see postMultiplyMultiQubitNot() +void postMultiplyMultiQubitNot(Qureg qureg, std::vector targets); + + /// @notyettested /// @notyetvalidated /// @notyetdoced diff --git a/quest/src/api/operations.cpp b/quest/src/api/operations.cpp index ad31411c3..1b57f7d2a 100644 --- a/quest/src/api/operations.cpp +++ b/quest/src/api/operations.cpp @@ -59,6 +59,12 @@ void validateAndApplyAnyCtrlAnyTargUnitaryMatrix(Qureg qureg, int* ctrls, int* s ctrlVec = util_getBraQubits(ctrlVec, qureg); targVec = util_getBraQubits(targVec, qureg); localiser_statevec_anyCtrlAnyTargAnyMatr(qureg, ctrlVec, stateVec, targVec, matr, conj); + + /// @todo + /// the above logic always performs two in-turn operations upon density matrices, + /// though when matr is diagonal (DiagMatr*), they can be trivially combined into + /// a single operation which enumerates the state only once. We perform this + /// optimisation for FullStateDiagMatr elsewhere. Consider optimising here too! } @@ -75,7 +81,21 @@ void multiplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix) { validate_matrixFields(matrix, __func__); // matrix can be non-unitary bool conj = false; - localiser_statevec_anyCtrlOneTargDenseMatr(qureg, {}, {}, target, matrix, conj); + bool transp = false; + localiser_statevec_anyCtrlOneTargDenseMatr(qureg, {}, {}, target, matrix, conj, transp); +} + +void postMultiplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_target(qureg, target, __func__); + validate_matrixFields(matrix, __func__); // matrix can be non-unitary + + // rho matrix ~ transpose(rho) (x) I ||rho>> + bool conj = false; + bool transp = true; + int qubit = util_getBraQubit(target, qureg); + localiser_statevec_anyCtrlOneTargDenseMatr(qureg, {}, {}, qubit, matrix, conj, transp); } void applyCompMatr1(Qureg qureg, int target, CompMatr1 matrix) { @@ -126,7 +146,23 @@ void multiplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix) validate_mixedAmpsFitInNode(qureg, 2, __func__); bool conj = false; - localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, target1, target2, matrix, conj); + bool transp = false; + localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, target1, target2, matrix, conj, transp); +} + +void postMultiplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_twoTargets(qureg, target1, target2, __func__); + validate_matrixFields(matrix, __func__); // matrix can be non-unitary + validate_mixedAmpsFitInNode(qureg, 2, __func__); + + // rho matrix ~ transpose(rho) (x) I ||rho>> + bool conj = false; + bool transp = true; + int qubit1 = util_getBraQubit(target1, qureg); + int qubit2 = util_getBraQubit(target2, qureg); + localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, qubit1, qubit2, matrix, conj, transp); } void applyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix) { @@ -181,7 +217,22 @@ void multiplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix validate_mixedAmpsFitInNode(qureg, numTargets, __func__); bool conj = false; - localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, conj); + bool transp = false; + localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, conj, transp); +} + +void postMultiplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_targets(qureg, targets, numTargets, __func__); + validate_matrixDimMatchesTargets(matrix, numTargets, __func__); // also validates fields and is-sync, but not unitarity + validate_mixedAmpsFitInNode(qureg, numTargets, __func__); + + // rho matrix ~ transpose(rho) (x) I ||rho>> + bool conj = false; + bool transp = true; + auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg); + localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, qubits, matrix, conj, transp); } void applyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix) { @@ -211,6 +262,11 @@ void multiplyCompMatr(Qureg qureg, vector targets, CompMatr matr) { multiplyCompMatr(qureg, targets.data(), targets.size(), matr); } +void postMultiplyCompMatr(Qureg qureg, vector targets, CompMatr matr) { + + postMultiplyCompMatr(qureg, targets.data(), targets.size(), matr); +} + void applyCompMatr(Qureg qureg, vector targets, CompMatr matr) { applyCompMatr(qureg, targets.data(), targets.size(), matr); @@ -249,6 +305,17 @@ void multiplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix) { localiser_statevec_anyCtrlOneTargDiagMatr(qureg, {}, {}, target, matrix, conj); } +void postMultiplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_target(qureg, target, __func__); + validate_matrixFields(matrix, __func__); // matrix can be non-unitary + + bool conj = false; + int qubit = util_getBraQubit(target, qureg); + localiser_statevec_anyCtrlOneTargDiagMatr(qureg, {}, {}, qubit, matrix, conj); +} + void applyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix) { validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, nullptr, nullptr, 0, &target, 1, matrix, __func__); @@ -299,6 +366,18 @@ void multiplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix) localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, {}, {}, target1, target2, matrix, conj); } +void postMultiplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_twoTargets(qureg, target1, target2, __func__); + validate_matrixFields(matrix, __func__); // matrix can be non-unitary + + bool conj = false; + int qubit1 = util_getBraQubit(target1, qureg); + int qubit2 = util_getBraQubit(target2, qureg); + localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, {}, {}, qubit1, qubit2, matrix, conj); +} + void applyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix) { int targs[] = {target1, target2}; @@ -351,7 +430,20 @@ void multiplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix bool conj = false; qcomp exponent = 1; - localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, exponent, conj); + auto qubits = util_getVector(targets, numTargets); + localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj); +} + +void postMultiplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_targets(qureg, targets, numTargets, __func__); + validate_matrixDimMatchesTargets(matrix, numTargets, __func__); // also validates fields and is-sync, but not unitarity + + bool conj = false; + qcomp exponent = 1; + auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg); + localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj); } void applyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix) { @@ -381,6 +473,11 @@ void multiplyDiagMatr(Qureg qureg, vector targets, DiagMatr matrix) { multiplyDiagMatr(qureg, targets.data(), targets.size(), matrix); } +void postMultiplyDiagMatr(Qureg qureg, vector targets, DiagMatr matrix) { + + postMultiplyDiagMatr(qureg, targets.data(), targets.size(), matrix); +} + void applyDiagMatr(Qureg qureg, vector targets, DiagMatr matrix) { applyDiagMatr(qureg, targets.data(), targets.size(), matrix); @@ -420,7 +517,20 @@ void multiplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr m validate_matrixExpIsNonDiverging(matrix, exponent, __func__); // harmlessly re-validates fields and is-sync bool conj = false; - localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, exponent, conj); + auto qubits = util_getVector(targets, numTargets); + localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj); +} + +void postMultiplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr matrix, qcomp exponent) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_targets(qureg, targets, numTargets, __func__); + validate_matrixDimMatchesTargets(matrix, numTargets, __func__); // also validates fields and is-sync, but not unitarity + validate_matrixExpIsNonDiverging(matrix, exponent, __func__); // harmlessly re-validates fields and is-sync + + bool conj = false; + auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg); + localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj); } void applyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr matrix, qcomp exponent) { @@ -505,6 +615,11 @@ void multiplyDiagMatrPower(Qureg qureg, vector targets, DiagMatr matrix, qc multiplyDiagMatrPower(qureg, targets.data(), targets.size(), matrix, exponent); } +void postMultiplyDiagMatrPower(Qureg qureg, vector targets, DiagMatr matrix, qcomp exponent) { + + postMultiplyDiagMatrPower(qureg, targets.data(), targets.size(), matrix, exponent); +} + void applyDiagMatrPower(Qureg qureg, vector targets, DiagMatr matrix, qcomp exponent) { applyDiagMatrPower(qureg, targets.data(), targets.size(), matrix, exponent); @@ -548,12 +663,39 @@ void multiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp validate_matrixAndQuregAreCompatible(matrix, qureg, false, __func__); // matrix can be non-unitary validate_matrixExpIsNonDiverging(matrix, exponent, __func__); - bool onlyMultiply = true; + // rho -> matrix^exponent rho + bool leftMultiply = true; + bool rightMultiply = false; + bool rightConj = false; + (qureg.isDensityMatrix)? - localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply): + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, leftMultiply, rightMultiply, rightConj): localiser_statevec_allTargDiagMatr(qureg, matrix, exponent); } +void postMultiplyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_matrixFields(matrix, __func__); + validate_matrixAndQuregAreCompatible(matrix, qureg, false, __func__); // matrix can be non-unitary + + postMultiplyFullStateDiagMatrPower(qureg, matrix, 1); // harmlessly re-validates +} + +void postMultiplyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp exponent) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_matrixFields(matrix, __func__); + validate_matrixAndQuregAreCompatible(matrix, qureg, false, __func__); // matrix can be non-unitary + validate_matrixExpIsNonDiverging(matrix, exponent, __func__); + + // rho -> rho matrix^exponent + bool leftMultiply = false; + bool rightMultiply = true; + bool rightConj = false; + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, leftMultiply, rightMultiply, rightConj); +} + void applyFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matrix) { validate_quregFields(qureg, __func__); validate_matrixFields(matrix, __func__); @@ -571,9 +713,13 @@ void applyFullStateDiagMatrPower(Qureg qureg, FullStateDiagMatr matrix, qcomp ex validate_unitaryExponentIsReal(exponent, __func__); validate_matrixExpIsNonDiverging(matrix, exponent, __func__); - bool onlyMultiply = false; + // rho -> matrix^exponent rho conj(matrix^exponent) + bool leftMultiply = true; + bool rightMultiply = true; + bool rightConj = true; + (qureg.isDensityMatrix)? - localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, onlyMultiply): + localiser_densmatr_allTargDiagMatr(qureg, matrix, exponent, leftMultiply, rightMultiply, rightConj): localiser_statevec_allTargDiagMatr(qureg, matrix, exponent); } @@ -751,6 +897,16 @@ void multiplySwap(Qureg qureg, int qubit1, int qubit2) { localiser_statevec_anyCtrlSwap(qureg, {}, {}, qubit1, qubit2); } +void postMultiplySwap(Qureg qureg, int qubit1, int qubit2) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_twoTargets(qureg, qubit1, qubit2, __func__); + + qubit1 = util_getBraQubit(qubit1, qureg); + qubit2 = util_getBraQubit(qubit2, qureg); + localiser_statevec_anyCtrlSwap(qureg, {}, {}, qubit1, qubit2); +} + void applySwap(Qureg qureg, int qubit1, int qubit2) { validate_quregFields(qureg, __func__); validate_twoTargets(qureg, qubit1, qubit2, __func__); @@ -884,6 +1040,61 @@ void applyMultiStateControlledSqrtSwap(Qureg qureg, vector controls, vector extern "C" { +void multiplyPauliX(Qureg qureg, int target) { + validate_quregFields(qureg, __func__); + validate_target(qureg, target, __func__); + + PauliStr str = getPauliStr("X", {target}); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str); +} + +void multiplyPauliY(Qureg qureg, int target) { + validate_quregFields(qureg, __func__); + validate_target(qureg, target, __func__); + + PauliStr str = getPauliStr("Y", {target}); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str); +} + +void multiplyPauliZ(Qureg qureg, int target) { + validate_quregFields(qureg, __func__); + validate_target(qureg, target, __func__); + + PauliStr str = getPauliStr("Z", {target}); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str); +} + +void postMultiplyPauliX(Qureg qureg, int target) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_target(qureg, target, __func__); + + PauliStr str = getPauliStr("X", {target}); + str = paulis_getShiftedPauliStr(str, qureg.numQubits); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str); +} + +void postMultiplyPauliY(Qureg qureg, int target) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_target(qureg, target, __func__); + + qcomp factor = -1; // undo transpose + PauliStr str = getPauliStr("Y", {target}); + str = paulis_getShiftedPauliStr(str, qureg.numQubits); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str, factor); +} + +void postMultiplyPauliZ(Qureg qureg, int target) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_target(qureg, target, __func__); + + PauliStr str = getPauliStr("Z", {target}); + str = paulis_getShiftedPauliStr(str, qureg.numQubits); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str); +} + void applyPauliX(Qureg qureg, int target) { validate_quregFields(qureg, __func__); validate_target(qureg, target, __func__); @@ -1034,6 +1245,16 @@ void multiplyPauliStr(Qureg qureg, PauliStr str) { localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str); } +void postMultiplyPauliStr(Qureg qureg, PauliStr str) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_pauliStrTargets(qureg, str, __func__); + + qcomp factor = paulis_hasOddNumY(str)? -1 : 1; // undo transpose + str = paulis_getShiftedPauliStr(str, qureg.numQubits); + localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str, factor); +} + void applyPauliStr(Qureg qureg, PauliStr str) { validate_quregFields(qureg, __func__); validate_pauliStrTargets(qureg, str, __func__); @@ -1120,7 +1341,7 @@ void multiplyPauliStrSum(Qureg qureg, PauliStrSum sum, Qureg workspace) { localiser_statevec_setQuregToSuperposition(0, workspace, 1, qureg, 0, qureg); localiser_statevec_initUniformState(qureg, 0); - // apply each term in-turn, mixing into output qureg, then undo using idempotency + // left-multiply each term in-turn, mixing into output qureg, then undo using idempotency for (qindex i=0; i qureg, and qureg -> sum * qureg } +void postMultiplyPauliStrSum(Qureg qureg, PauliStrSum sum, Qureg workspace) { + validate_quregFields(qureg, __func__); + validate_quregFields(workspace, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_quregCanBeWorkspace(qureg, workspace, __func__); + validate_pauliStrSumFields(sum, __func__); + validate_pauliStrSumTargets(sum, qureg, __func__); + + // clone qureg to workspace, set qureg to blank + localiser_statevec_setQuregToSuperposition(0, workspace, 1, qureg, 0, qureg); + localiser_statevec_initUniformState(qureg, 0); + + // post-multiply each term in-turn, mixing into output qureg, then undo using idempotency + for (qindex i=0; i qureg, and qureg -> sum * qureg +} + void internal_applyFirstOrderTrotterRepetition( Qureg qureg, vector& ketCtrls, vector& braCtrls, vector& states, PauliStrSum sum, qcomp angle, bool reverse @@ -1496,6 +1742,17 @@ void multiplyPauliGadget(Qureg qureg, PauliStr str, qreal angle) { localiser_statevec_anyCtrlPauliGadget(qureg, {}, {}, str, phase); } +void postMultiplyPauliGadget(Qureg qureg, PauliStr str, qreal angle) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_pauliStrTargets(qureg, str, __func__); + + qreal factor = paulis_hasOddNumY(str)? -1 : 1; + qreal phase = factor * util_getPhaseFromGateAngle(angle); + str = paulis_getShiftedPauliStr(str, qureg.numQubits); + localiser_statevec_anyCtrlPauliGadget(qureg, {}, {}, str, phase); +} + void applyPauliGadget(Qureg qureg, PauliStr str, qreal angle) { validate_quregFields(qureg, __func__); validate_pauliStrTargets(qureg, str, __func__); @@ -1589,7 +1846,18 @@ void multiplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle) validate_targets(qureg, targets, numTargets, __func__); qreal phase = util_getPhaseFromGateAngle(angle); - localiser_statevec_anyCtrlPhaseGadget(qureg, {}, {}, util_getVector(targets,numTargets), phase); + auto qubits = util_getVector(targets, numTargets); + localiser_statevec_anyCtrlPhaseGadget(qureg, {}, {}, qubits, phase); +} + +void postMultiplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_targets(qureg, targets, numTargets, __func__); + + qreal phase = util_getPhaseFromGateAngle(angle); + auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg); + localiser_statevec_anyCtrlPhaseGadget(qureg, {}, {}, qubits, phase); } void applyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle) { @@ -1643,6 +1911,11 @@ void multiplyPhaseGadget(Qureg qureg, vector targets, qreal angle) { multiplyPhaseGadget(qureg, targets.data(), targets.size(), angle); } +void postMultiplyPhaseGadget(Qureg qureg, vector targets, qreal angle) { + + postMultiplyPhaseGadget(qureg, targets.data(), targets.size(), angle); +} + void applyPhaseGadget(Qureg qureg, vector targets, qreal angle) { applyPhaseGadget(qureg, targets.data(), targets.size(), angle); @@ -1763,7 +2036,18 @@ void multiplyMultiQubitNot(Qureg qureg, int* targets, int numTargets) { validate_targets(qureg, targets, numTargets, __func__); // harmlessly re-validates - multiplyPauliStr(qureg, getPauliStr(std::string(numTargets, 'X'), targets, numTargets)); + PauliStr str = getPauliStr(std::string(numTargets, 'X'), targets, numTargets); + multiplyPauliStr(qureg, str); +} + +void postMultiplyMultiQubitNot(Qureg qureg, int* targets, int numTargets) { + validate_quregFields(qureg, __func__); + validate_quregIsDensityMatrix(qureg, __func__); + validate_targets(qureg, targets, numTargets, __func__); + + // harmlessly re-validates + PauliStr str = getPauliStr(std::string(numTargets, 'X'), targets, numTargets); + postMultiplyPauliStr(qureg, str); } void applyMultiQubitNot(Qureg qureg, int* targets, int numTargets) { @@ -1809,6 +2093,11 @@ void multiplyMultiQubitNot(Qureg qureg, vector targets) { multiplyMultiQubitNot(qureg, targets.data(), targets.size()); } +void postMultiplyMultiQubitNot(Qureg qureg, vector targets) { + + postMultiplyMultiQubitNot(qureg, targets.data(), targets.size()); +} + void applyMultiQubitNot(Qureg qureg, vector targets) { applyMultiQubitNot(qureg, targets.data(), targets.size()); diff --git a/quest/src/core/accelerator.cpp b/quest/src/core/accelerator.cpp index 9f877fba3..5e3e40f55 100644 --- a/quest/src/core/accelerator.cpp +++ b/quest/src/core/accelerator.cpp @@ -63,6 +63,12 @@ using std::min; ((b2)? funcname : funcname)) +#define GET_CPU_OR_GPU_FOUR_BOOL_FUNC_OPTIMISED_FOR_FIRST_BOOL( isgpu, funcsuffix, value, fixed1,fixed2,fixed3 ) \ + ((isgpu)? \ + ((value)? gpu_##funcsuffix : gpu_##funcsuffix ) : \ + ((value)? cpu_##funcsuffix : cpu_##funcsuffix )) + + #if (MAX_OPTIMISED_NUM_CTRLS != 5) || (MAX_OPTIMISED_NUM_TARGS != 5) #error "The number of optimised, templated QuEST functions was inconsistent between accelerator's source and header." #endif @@ -108,9 +114,9 @@ using std::min; /// @todo -/// GET_CPU_OR_GPU_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS, -/// as defined below, is only ever called by used by anyCtrlAnyTargDenseMatr, -/// which only ever receives numTargs>=3 (due to accelerator redirecting +/// GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS as defined below +/// is used by anyCtrlAnyTargDiagMatr and anyCtrlAnyTargDenseMatr; the +/// latter only ever receives numTargs>=3 (due to accelerator redirecting /// fewer targets to faster bespoke functions which e.g. avoid global GPU /// cache emory access). This means its instantiation with numTargs=0,1,2 /// is useless, though contributes to 42% of the function's compilation @@ -118,38 +124,7 @@ using std::min; /// can ergo non-negligibly speed up compilation by avoiding these redundant /// instances at the cost of increased code complexity/asymmetry. Consider! -#define GET_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(f, numctrls, numtargs, c) \ - (vector { \ - CONJ_ARR(f) {&f<0,0,c>, &f<0,1,c>, &f<0,2,c>, &f<0,3,c>, &f<0,4,c>, &f<0,5,c>, &f<0,-1,c>}, \ - CONJ_ARR(f) {&f<1,0,c>, &f<1,1,c>, &f<1,2,c>, &f<1,3,c>, &f<1,4,c>, &f<1,5,c>, &f<1,-1,c>}, \ - CONJ_ARR(f) {&f<2,0,c>, &f<2,1,c>, &f<2,2,c>, &f<2,3,c>, &f<2,4,c>, &f<2,5,c>, &f<2,-1,c>}, \ - CONJ_ARR(f) {&f<3,0,c>, &f<3,1,c>, &f<3,2,c>, &f<3,3,c>, &f<3,4,c>, &f<3,5,c>, &f<3,-1,c>}, \ - CONJ_ARR(f) {&f<4,0,c>, &f<4,1,c>, &f<4,2,c>, &f<4,3,c>, &f<4,4,c>, &f<4,5,c>, &f<4,-1,c>}, \ - CONJ_ARR(f) {&f<5,0,c>, &f<5,1,c>, &f<5,2,c>, &f<5,3,c>, &f<5,4,c>, &f<5,5,c>, &f<5,-1,c>}, \ - CONJ_ARR(f) {&f<-1,0,c>, &f<-1,1,c>, &f<-1,2,c>, &f<-1,3,c>, &f<-1,4,c>, &f<-1,5,c>, &f<-1,-1,c>}}) \ - [std::min((int) numctrls, MAX_OPTIMISED_NUM_CTRLS + 1)] \ - [std::min((int) numtargs, MAX_OPTIMISED_NUM_TARGS + 1)] - -#define CONJ_ARR(f) vector)> - -#define GET_CPU_OR_GPU_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(funcsuffix, qureg, numctrls, numtargs, conj) \ - ((qureg.isGpuAccelerated)? \ - ((conj)? \ - GET_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true ) : \ - GET_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false ) ) : \ - ((conj)? \ - GET_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true ) : \ - GET_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false ) ) ) - - -/// @todo -/// This has gotten a bit ridiculous. Is there a way to use (likely) -/// more abominable pre-processor mischief which negates the need -/// to repeat the entire macro(s) when the number of templated -/// parameters grows? - - -#define GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(f, numctrls, numtargs, c, h) \ +#define GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(f, numctrls, numtargs, c, h) \ (vector { \ POWER_CONJ_ARR(f) {&f<0,0,c,h>, &f<0,1,c,h>, &f<0,2,c,h>, &f<0,3,c,h>, &f<0,4,c,h>, &f<0,5,c,h>, &f<0,-1,c,h>}, \ POWER_CONJ_ARR(f) {&f<1,0,c,h>, &f<1,1,c,h>, &f<1,2,c,h>, &f<1,3,c,h>, &f<1,4,c,h>, &f<1,5,c,h>, &f<1,-1,c,h>}, \ @@ -163,22 +138,25 @@ using std::min; #define POWER_CONJ_ARR(f) vector)> -#define GET_CPU_OR_GPU_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(funcsuffix, qureg, numctrls, numtargs, conj, haspower) \ +#define GET_CPU_OR_GPU_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(funcsuffix, qureg, numctrls, numtargs, conj, haspower) \ ((qureg.isGpuAccelerated)? \ ((conj)? \ ((haspower)? \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true, true ) : \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true, false ) ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true, true ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true, false ) ) : \ ((haspower)? \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false, true ) : \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false, false ) ) ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false, true ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false, false ) ) ) : \ ((conj)? \ ((haspower)? \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true, true ) : \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true, false ) ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true, true ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true, false ) ) : \ ((haspower)? \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false, true ) : \ - GET_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false, false ) ) ) ) + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false, true ) : \ + GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false, false ) ) ) ) + +/// @todo +/// The above macro spaghetti is diabolical - update using C++ metaprogamming! @@ -329,9 +307,9 @@ void accel_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, } -void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj) { +void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp) { - auto func = GET_CPU_OR_GPU_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevec_anyCtrlAnyTargDenseMatr_sub, qureg, ctrls.size(), targs.size(), conj ); + auto func = GET_CPU_OR_GPU_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevec_anyCtrlAnyTargDenseMatr_sub, qureg, ctrls.size(), targs.size(), conj, transp ); func(qureg, ctrls, ctrlStates, targs, matr); } @@ -360,7 +338,7 @@ void accel_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, vector ctrls, v bool hasPower = exponent != qcomp(1, 0); - auto func = GET_CPU_OR_GPU_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevec_anyCtrlAnyTargDiagMatr_sub, qureg, ctrls.size(), targs.size(), conj, hasPower ); + auto func = GET_CPU_OR_GPU_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevec_anyCtrlAnyTargDiagMatr_sub, qureg, ctrls.size(), targs.size(), conj, hasPower ); func(qureg, ctrls, ctrlStates, targs, matr, exponent); } @@ -414,7 +392,30 @@ void accel_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qco } -void accel_densmatr_allTargDiagMatr_subA(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyOnly) { +auto getDenseMatrAllTargDiagMatrFunc(bool isGpu, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight) { + + // this helper function exists, dissimilar from the function-agnostic macros used + // by other functions, because densmatr_allTargDiagMatr_sub() does not accept every + // possible combination of its boolean template parameters + assert_fullStateDiagMatrTemplateParamsAreValid(multiplyLeft, multiplyRight, conjRight); + + bool hasPower = exponent != qcomp(1, 0); + + if (multiplyLeft && multiplyRight && conjRight) + return GET_CPU_OR_GPU_FOUR_BOOL_FUNC_OPTIMISED_FOR_FIRST_BOOL( isGpu, densmatr_allTargDiagMatr_sub, hasPower, true,true,true ); + + if (multiplyLeft && ! multiplyRight && ! conjRight) + return GET_CPU_OR_GPU_FOUR_BOOL_FUNC_OPTIMISED_FOR_FIRST_BOOL( isGpu, densmatr_allTargDiagMatr_sub, hasPower, true,false,false ); + + if (! multiplyLeft && multiplyRight && ! conjRight) + return GET_CPU_OR_GPU_FOUR_BOOL_FUNC_OPTIMISED_FOR_FIRST_BOOL( isGpu, densmatr_allTargDiagMatr_sub, hasPower, false,true,false ); + + // unreachable + return (void (*)(Qureg, FullStateDiagMatr, qcomp)) nullptr; +} + + +void accel_densmatr_allTargDiagMatr_subA(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight) { // matr is always local, qureg can be local or distributed... assert_fullStateDiagMatrIsLocal(matr); @@ -423,9 +424,9 @@ void accel_densmatr_allTargDiagMatr_subA(Qureg qureg, FullStateDiagMatr matr, qc bool quregGPU = qureg.isGpuAccelerated; bool matrGPU = matr.isGpuAccelerated; - bool hasPower = exponent != qcomp(1, 0); - auto cpuFunc = GET_FUNC_OPTIMISED_FOR_TWO_BOOLS( cpu_densmatr_allTargDiagMatr_sub, hasPower, multiplyOnly ); - auto gpuFunc = GET_FUNC_OPTIMISED_FOR_TWO_BOOLS( gpu_densmatr_allTargDiagMatr_sub, hasPower, multiplyOnly ); + // which determines which function is called + auto gpuFunc = getDenseMatrAllTargDiagMatrFunc(true, exponent, multiplyLeft, multiplyRight, conjRight); + auto cpuFunc = getDenseMatrAllTargDiagMatrFunc(false, exponent, multiplyLeft, multiplyRight, conjRight); // when deployments match, we trivially call the common backend if ( quregGPU && matrGPU) gpuFunc(qureg, matr, exponent); @@ -475,7 +476,7 @@ void accel_densmatr_allTargDiagMatr_subA(Qureg qureg, FullStateDiagMatr matr, qc } -void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyOnly) { +void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight) { assert_fullStateDiagMatrIsDistributed(matr); assert_acceleratorQuregIsDistributed(qureg); @@ -500,7 +501,7 @@ void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qc temp.cpuElems = qureg.cpuCommBuffer; temp.gpuElems = qureg.gpuCommBuffer; - accel_densmatr_allTargDiagMatr_subA(qureg, temp, exponent, multiplyOnly); + accel_densmatr_allTargDiagMatr_subA(qureg, temp, exponent, multiplyLeft, multiplyRight, conjRight); } diff --git a/quest/src/core/accelerator.hpp b/quest/src/core/accelerator.hpp index 01cf3efcd..5480d8133 100644 --- a/quest/src/core/accelerator.hpp +++ b/quest/src/core/accelerator.hpp @@ -101,22 +101,22 @@ using std::vector; template returntype funcname <-1,numtargs, conj> args; -#define INSTANTIATE_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(returntype, funcname, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, true, true, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, true, false, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, false, true, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, false, false, args) - -#define private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 0, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 1, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 2, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 3, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 4, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 5, conj, haspower, args) \ - private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname,-1, conj, haspower, args) - -#define private_EXPONENTIABLE_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, numtargs, conj, haspower, args) \ +#define INSTANTIATE_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(returntype, funcname, args) \ + private_TWO_BOOL_INSTANTIATE_outer(returntype, funcname, true, true, args) \ + private_TWO_BOOL_INSTANTIATE_outer(returntype, funcname, true, false, args) \ + private_TWO_BOOL_INSTANTIATE_outer(returntype, funcname, false, true, args) \ + private_TWO_BOOL_INSTANTIATE_outer(returntype, funcname, false, false, args) + +#define private_TWO_BOOL_INSTANTIATE_outer(returntype, funcname, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, 0, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, 1, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, 2, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, 3, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, 4, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, 5, conj, haspower, args) \ + private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname,-1, conj, haspower, args) + +#define private_TWO_BOOL_INSTANTIATE_inner(returntype, funcname, numtargs, conj, haspower, args) \ template returntype funcname <0, numtargs, conj, haspower> args; \ template returntype funcname <1, numtargs, conj, haspower> args; \ template returntype funcname <2, numtargs, conj, haspower> args; \ @@ -194,7 +194,7 @@ void accel_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qureg, vector ctrls, void accel_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr); -void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj); +void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp); /* @@ -209,8 +209,8 @@ void accel_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, vector ctrls, v void accel_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); -void accel_densmatr_allTargDiagMatr_subA(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyOnly); -void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyOnly); +void accel_densmatr_allTargDiagMatr_subA(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight); +void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight); diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp index 2f44127c8..a24516891 100644 --- a/quest/src/core/errors.cpp +++ b/quest/src/core/errors.cpp @@ -431,6 +431,18 @@ void assert_fullStateDiagMatrIsDistributed(FullStateDiagMatr matr) { raiseInternalError("An accelerator function received a non-distributed FullStateDiagMatr where a distributed one was expected."); } +void assert_fullStateDiagMatrTemplateParamsAreValid(bool multiplyLeft, bool multiplyRight, bool conjRight) { + + bool valid = ( + ( multiplyLeft && multiplyRight && conjRight) || // matr qureg conj(matr) + ( multiplyLeft && ! multiplyRight && ! conjRight) || // matr qureg + (! multiplyLeft && multiplyRight && ! conjRight) // qureg matr + ); + + if (!valid) + raiseInternalError("The accelerator function accel_densmatr_allTargDiagMatr_subA() recieved an invalid combination of template parameters."); +} + void assert_acceleratorQuregIsDistributed(Qureg qureg) { if (!qureg.isDistributed) @@ -603,6 +615,11 @@ void error_gpuDeadCopyMatrixFunctionCalled() { raiseInternalError("The internal GPU function copyMatrixIfGpuCompiled() was called, though is intended as dead-code - matrices needing copying to GPU should be stored as flat row-wise lists."); } +void error_gpuDenseMatrixConjugatedAndTransposed() { + + raiseInternalError("The GPU + cuQuantum implementation of anyCtrlAnyTargDenseMatr() assumes that at most one of template arguments ApplyConj and ApplyTransp is true, though this was violated."); +} + void assert_quregIsGpuAccelerated(Qureg qureg) { if (!qureg.isGpuAccelerated) diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp index ce8f7e68c..7f6a0c609 100644 --- a/quest/src/core/errors.hpp +++ b/quest/src/core/errors.hpp @@ -153,6 +153,8 @@ void assert_fullStateDiagMatrIsLocal(FullStateDiagMatr matr); void assert_fullStateDiagMatrIsDistributed(FullStateDiagMatr matr); +void assert_fullStateDiagMatrTemplateParamsAreValid(bool multiplyLeft, bool multiplyRight, bool conjRight); + void assert_acceleratorQuregIsDistributed(Qureg qureg); void assert_quregAndFullStateDiagMatrAreBothOrNeitherDistrib(Qureg qureg, FullStateDiagMatr matr); @@ -227,6 +229,8 @@ void error_gpuUnexpectedlyInaccessible(); void error_gpuDeadCopyMatrixFunctionCalled(); +void error_gpuDenseMatrixConjugatedAndTransposed(); + void assert_gpuIsAccessible(); void assert_gpuHasBeenBound(bool isBound); diff --git a/quest/src/core/localiser.cpp b/quest/src/core/localiser.cpp index cdaef4c24..71cb8f211 100644 --- a/quest/src/core/localiser.cpp +++ b/quest/src/core/localiser.cpp @@ -953,7 +953,7 @@ void anyCtrlOneTargDenseMatrOnPrefix(Qureg qureg, vector ctrls, vector } -void localiser_statevec_anyCtrlOneTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ, CompMatr1 matr, bool conj) { +void localiser_statevec_anyCtrlOneTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ, CompMatr1 matr, bool conj, bool transp) { assertValidCtrlStates(ctrls, ctrlStates); setDefaultCtrlStates(ctrls, ctrlStates); @@ -964,8 +964,11 @@ void localiser_statevec_anyCtrlOneTargDenseMatr(Qureg qureg, vector ctrls, // retain only suffix control qubits as relevant to communication and local amp modification removePrefixQubitsAndStates(qureg, ctrls, ctrlStates); + // only one of conj or transp will be true (but logic is correct if both were true) if (conj) matr = util_getConj(matr); + if (transp) + matr = util_getTranspose(matr); // perform embarrassingly parallel routine or communication-inducing swaps doesGateRequireComm(qureg, targ)? @@ -983,18 +986,21 @@ void localiser_statevec_anyCtrlOneTargDenseMatr(Qureg qureg, vector ctrls, */ -void anyCtrlTwoOrAnyTargDenseMatrOnSuffix(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr2 matr, bool conj) { - if (conj) matr = util_getConj(matr); +void anyCtrlTwoOrAnyTargDenseMatrOnSuffix(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr2 matr, bool conj, bool transp) { + if (conj) + matr = util_getConj(matr); + if (transp) + matr = util_getTranspose(matr); accel_statevec_anyCtrlTwoTargDenseMatr_sub(qureg, ctrls, ctrlStates, targs[0], targs[1], matr); } -void anyCtrlTwoOrAnyTargDenseMatrOnSuffix(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj) { - accel_statevec_anyCtrlAnyTargDenseMatr_sub(qureg, ctrls, ctrlStates, targs, matr, conj); +void anyCtrlTwoOrAnyTargDenseMatrOnSuffix(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp) { + accel_statevec_anyCtrlAnyTargDenseMatr_sub(qureg, ctrls, ctrlStates, targs, matr, conj, transp); } // T can be CompMatr2 or CompMatr template -void anyCtrlTwoOrAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, T matr, bool conj) { +void anyCtrlTwoOrAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, T matr, bool conj, bool transp) { // node has nothing to do if all local amps violate control condition if (!doAnyLocalStatesHaveQubitValues(qureg, ctrls, ctrlStates)) @@ -1005,7 +1011,7 @@ void anyCtrlTwoOrAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ct // using only the suffix ctrls removePrefixQubitsAndStates(qureg, ctrls, ctrlStates); - anyCtrlTwoOrAnyTargDenseMatrOnSuffix(qureg, ctrls, ctrlStates, targs, matr, conj); + anyCtrlTwoOrAnyTargDenseMatrOnSuffix(qureg, ctrls, ctrlStates, targs, matr, conj, transp); return; } @@ -1032,7 +1038,7 @@ void anyCtrlTwoOrAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ct // perform embarrassingly parallel simulation using only the new suffix ctrls removePrefixQubitsAndStates(qureg, newCtrls, ctrlStates); - anyCtrlTwoOrAnyTargDenseMatrOnSuffix(qureg, newCtrls, ctrlStates, newTargs, matr, conj); + anyCtrlTwoOrAnyTargDenseMatrOnSuffix(qureg, newCtrls, ctrlStates, newTargs, matr, conj, transp); } // undo swaps, again invoking communication @@ -1040,15 +1046,15 @@ void anyCtrlTwoOrAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ct } -void localiser_statevec_anyCtrlTwoTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr, bool conj) { +void localiser_statevec_anyCtrlTwoTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr, bool conj, bool transp) { assertValidCtrlStates(ctrls, ctrlStates); setDefaultCtrlStates(ctrls, ctrlStates); - anyCtrlTwoOrAnyTargDenseMatr(qureg, ctrls, ctrlStates, {targ1,targ2}, matr, conj); + anyCtrlTwoOrAnyTargDenseMatr(qureg, ctrls, ctrlStates, {targ1,targ2}, matr, conj, transp); } -void localiser_statevec_anyCtrlAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj) { +void localiser_statevec_anyCtrlAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp) { assertValidCtrlStates(ctrls, ctrlStates); setDefaultCtrlStates(ctrls, ctrlStates); @@ -1060,19 +1066,19 @@ void localiser_statevec_anyCtrlAnyTargDenseMatr(Qureg qureg, vector ctrls, // its convenient generality, so we divert to the one-targ routine when possible, copying the // heap CPU matrix (assumed consistent with GPU memory) into stack memory if (targs.size() == 1) - localiser_statevec_anyCtrlOneTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], getCompMatr1(matr.cpuElems), conj); + localiser_statevec_anyCtrlOneTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], getCompMatr1(matr.cpuElems), conj, transp); // similarly, bespoke two-targ routines are preferable although they offer no communication // benefit because they call the same any-targ localiser, but still accelerate GPU memory access. // this function call is the same as below, but we explicitly pass a CompMatr2 type in lieu of // CompMatr, which avoids having to copy the CompMatr dynamic memory into accelerator backends else if (targs.size() == 2) - localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], targs[1], getCompMatr2(matr.cpuElems), conj); + localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], targs[1], getCompMatr2(matr.cpuElems), conj, transp); // call the any-targ routine when given 3 or more targs, which may still invoke bespoke, // fixed-targ instances of backend templated functions depending the number of targs else - anyCtrlTwoOrAnyTargDenseMatr(qureg, ctrls, ctrlStates, targs, matr, conj); + anyCtrlTwoOrAnyTargDenseMatr(qureg, ctrls, ctrlStates, targs, matr, conj, transp); } @@ -1165,7 +1171,7 @@ void localiser_statevec_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qco } -void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyOnly) { +void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight) { assert_localiserGivenDensMatr(qureg); // the diagonal matr has quadratically fewer elements than the density-matrix @@ -1191,7 +1197,7 @@ void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qco // when the matrix is not distributed, we call the same routine despite whether qureg // is distributed or not; that merely changes how many qureg columns get updated if (!matrDist) { - accel_densmatr_allTargDiagMatr_subA(qureg, matr, exponent, multiplyOnly); + accel_densmatr_allTargDiagMatr_subA(qureg, matr, exponent, multiplyLeft, multiplyRight, conjRight); return; } @@ -1200,7 +1206,7 @@ void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qco // matr elems are inside qureg buffer, but we still pass matr struct along to // accelerator, because it is going to perform mischief to re-use subA(). - accel_densmatr_allTargDiagMatr_subB(qureg, matr, exponent, multiplyOnly); + accel_densmatr_allTargDiagMatr_subB(qureg, matr, exponent, multiplyLeft, multiplyRight, conjRight); } @@ -1215,12 +1221,17 @@ void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qco template void localiser_statevec_anyCtrlAnyTargAnyMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, T matr, bool conj) { - if constexpr (util_isDiagMatr ()) localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, ctrls, ctrlStates, targs, matr, 1, conj); // exponent=1 - if constexpr (util_isDiagMatr1()) localiser_statevec_anyCtrlOneTargDiagMatr(qureg, ctrls, ctrlStates, targs[0], matr, conj); + + // this function is never invoked by operations whch require transposing matr + bool transp = false; + qcomp expo = 1; + + if constexpr (util_isDiagMatr ()) localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, ctrls, ctrlStates, targs, matr, expo, conj); + if constexpr (util_isDiagMatr1()) localiser_statevec_anyCtrlOneTargDiagMatr(qureg, ctrls, ctrlStates, targs[0], matr, conj); if constexpr (util_isDiagMatr2()) localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, ctrls, ctrlStates, targs[0], targs[1], matr, conj); - if constexpr (util_isCompMatr ()) localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, ctrls, ctrlStates, targs, matr, conj); - if constexpr (util_isCompMatr1()) localiser_statevec_anyCtrlOneTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], matr, conj); - if constexpr (util_isCompMatr2()) localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], targs[1], matr, conj); + if constexpr (util_isCompMatr ()) localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, ctrls, ctrlStates, targs, matr, conj, transp); + if constexpr (util_isCompMatr1()) localiser_statevec_anyCtrlOneTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], matr, conj, transp); + if constexpr (util_isCompMatr2()) localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, ctrls, ctrlStates, targs[0], targs[1], matr, conj, transp); } template void localiser_statevec_anyCtrlAnyTargAnyMatr(Qureg, vector, vector, vector, DiagMatr, bool); @@ -1670,12 +1681,13 @@ CompMatr getSpoofedCompMatrFromSuperOp(SuperOp op) { void localiser_densmatr_superoperator(Qureg qureg, SuperOp op, vector ketTargs) { assert_localiserGivenDensMatr(qureg); - // effect the superoperator as a (non-conjugated) dense matrix on the ket + bra qubits + // effect the superoperator as a dense matrix on the ket + bra qubits bool conj = false; + bool transp = false; auto braTargs = util_getBraQubits(ketTargs, qureg); auto allTargs = util_getConcatenated(ketTargs, braTargs); CompMatr matr = getSpoofedCompMatrFromSuperOp(op); - localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, allTargs, matr, conj); + localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, allTargs, matr, conj, transp); } diff --git a/quest/src/core/localiser.hpp b/quest/src/core/localiser.hpp index 8b9975aca..50413fe68 100644 --- a/quest/src/core/localiser.hpp +++ b/quest/src/core/localiser.hpp @@ -83,11 +83,11 @@ void localiser_statevec_anyCtrlSwap(Qureg qureg, vector ctrls, vector * DENSE MATRICES */ -void localiser_statevec_anyCtrlOneTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ, CompMatr1 matr, bool conj); +void localiser_statevec_anyCtrlOneTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ, CompMatr1 matr, bool conj, bool transp); -void localiser_statevec_anyCtrlTwoTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr, bool conj); +void localiser_statevec_anyCtrlTwoTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr, bool conj, bool transp); -void localiser_statevec_anyCtrlAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj); +void localiser_statevec_anyCtrlAnyTargDenseMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp); /* @@ -101,7 +101,7 @@ void localiser_statevec_anyCtrlTwoTargDiagMatr(Qureg qureg, vector ctrls, v void localiser_statevec_anyCtrlAnyTargDiagMatr(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, DiagMatr matr, qcomp exponent, bool conj); void localiser_statevec_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); -void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyOnly); +void localiser_densmatr_allTargDiagMatr(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool multiplyLeft, bool multiplyRight, bool conjRight); /* diff --git a/quest/src/core/utilities.cpp b/quest/src/core/utilities.cpp index 1c22d43d8..16891f234 100644 --- a/quest/src/core/utilities.cpp +++ b/quest/src/core/utilities.cpp @@ -409,6 +409,34 @@ void util_setConj(DiagMatr matrix) { } +/* + * MATRIX TRANSPOSITION + */ + +// type T can be qcomp*[2] or qcomp*[4] +template +void setDenseElemsTranspose(T elems, qindex dim) { + for (qindex i=0; i +template void cpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr) { assert_numCtrlsMatchesNumCtrlStatesAndTemplateParam(ctrls.size(), ctrlStates.size(), NumCtrls); @@ -548,8 +548,13 @@ void cpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, ve // loop may be unrolled for (qindex j=0; j ctrls, ve } -INSTANTIATE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, cpu_statevec_anyCtrlAnyTargDenseMatr_sub, (Qureg, vector, vector, vector, CompMatr) ) +INSTANTIATE_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, cpu_statevec_anyCtrlAnyTargDenseMatr_sub, (Qureg, vector, vector, vector, CompMatr) ) @@ -701,7 +706,14 @@ void cpu_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, vector ctrls, vec } -INSTANTIATE_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, cpu_statevec_anyCtrlAnyTargDiagMatr_sub, (Qureg, vector, vector, vector, DiagMatr, qcomp) ) +INSTANTIATE_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, cpu_statevec_anyCtrlAnyTargDiagMatr_sub, (Qureg, vector, vector, vector, DiagMatr, qcomp) ) + + +/// @todo +/// there is currently no density matrix version of anyCtrlAnyTargDiagMatr_sub(); +/// instead, operations.cpp invokes the statevector version twice as it does for +/// dense matrices. This re-enumeration of the state however can be avoided since +/// the matrix is diagonal, as done below for cpu_densmatr_allTargDiagMatr_sub() @@ -737,28 +749,46 @@ void cpu_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp } -template +template void cpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent) { + // unlike other functions, this function handles all scenarios of... + // - matr -> matr qureg conj(matr) + // - matr -> matr qureg + // - matr -> qureg matr + // and all of the above where matr is raised to a power. This is an + // optimisation permitted by diagonality of matr, avoiding superfluous + // re-enumeration of the state otherwise invoked by operations.cpp + assert_exponentMatchesTemplateParam(exponent, HasPower); - // every iteration modifies one qureg amp, using one matr element + // every iteration modifies one qureg amp, using one or two matr elements qindex numIts = qureg.numAmpsPerNode; #pragma omp parallel for if(qureg.isMultithreaded||matr.isMultithreaded) for (qindex n=0; n (matr * rho) or (matr^exponent * rho) + if constexpr (MultiplyLeft) { - // and whether we should also right-apply matr to qureg - if constexpr (!MultiplyOnly) { + // i = global row of nth local amp + qindex i = fast_getQuregGlobalRowFromFlatIndex(n, matr.numElems); + qcomp term = matr.cpuElems[i]; + + // compile-time decide if applying power to avoid in-loop branching... + // (beware that complex pow() is numerically unstable as detailed below) + if constexpr (HasPower) + term = std::pow(term, exponent); + + fac = term; + } + + // update fac to additional include rho -> (rho * matr) or + // (rho * conj(matr)), or the same exponentiated + if constexpr (MultiplyRight) { // m = global index corresponding to n qindex m = concatenateBits(qureg.rank, n, qureg.logNumAmpsPerNode); @@ -767,16 +797,18 @@ void cpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp qindex j = fast_getQuregGlobalColFromFlatIndex(m, matr.numElems); qcomp term = matr.cpuElems[j]; - // right-apply matrix elem may also need to be exponentiated. // beware that pow(qcomp,qcomp) below gives notable error over pow(qreal,qreal) // (by producing an unexpected non-zero imaginary component) when the base is real // and negative, and the exponent is an integer. We tolerate this heightened error // because we have no reason to think matr is real (it's not constrained Hermitian). - if constexpr(HasPower) + if constexpr (HasPower) term = std::pow(term, exponent); - // conj after pow - fac *= std::conj(term); + // conj strictly after pow, to effect conj(matr^exponent) + if constexpr (ConjRight) + term = std::conj(term); + + fac *= term; } qureg.cpuAmps[n] *= fac; @@ -787,10 +819,12 @@ void cpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp template void cpu_statevec_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); template void cpu_statevec_allTargDiagMatr_sub(Qureg, FullStateDiagMatr, qcomp); -template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); -template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); -template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); -template void cpu_densmatr_allTargDiagMatr_sub(Qureg, FullStateDiagMatr, qcomp); +template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr qureg conj(matr) +template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr qureg +template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // qureg matr +template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr^P qureg conj(matr^P) +template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr^P qureg +template void cpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // qureg matr^P diff --git a/quest/src/cpu/cpu_subroutines.hpp b/quest/src/cpu/cpu_subroutines.hpp index 661de150a..ea570ba2b 100644 --- a/quest/src/cpu/cpu_subroutines.hpp +++ b/quest/src/cpu/cpu_subroutines.hpp @@ -67,7 +67,7 @@ template void cpu_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qur template void cpu_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr); -template void cpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr); +template void cpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr); /* @@ -82,7 +82,7 @@ template void cpu_st template void cpu_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); -template void cpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); +template void cpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); /* diff --git a/quest/src/gpu/gpu_cuquantum.cuh b/quest/src/gpu/gpu_cuquantum.cuh index 3b1c55fdb..13afaae87 100644 --- a/quest/src/gpu/gpu_cuquantum.cuh +++ b/quest/src/gpu/gpu_cuquantum.cuh @@ -197,14 +197,11 @@ void cuquantum_statevec_anyCtrlSwap_subA(Qureg qureg, vector ctrls, vector< */ -void cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, cu_qcomp* flatMatrElems) { +void cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, cu_qcomp* flatMatrElems, bool applyAdj) { // this funciton is called 'subA' instead of just 'sub', because it is also called in // the one-target case whereby it is strictly the embarrassingly parallel _subA scenario - // do not adjoint matrix - int adj = 0; - // use automatic workspace management void* work = nullptr; size_t workSize = 0; @@ -212,7 +209,7 @@ void cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(Qureg qureg, vector CUDA_CHECK( custatevecApplyMatrix( config.handle, toCuQcomps(qureg.gpuAmps), CUQUANTUM_QCOMP, qureg.logNumAmpsPerNode, - flatMatrElems, CUQUANTUM_QCOMP, CUSTATEVEC_MATRIX_LAYOUT_ROW, adj, + flatMatrElems, CUQUANTUM_QCOMP, CUSTATEVEC_MATRIX_LAYOUT_ROW, applyAdj, targs.data(), targs.size(), ctrls.data(), ctrlStates.data(), ctrls.size(), CUSTATEVEC_COMPUTE_DEFAULT, diff --git a/quest/src/gpu/gpu_kernels.cuh b/quest/src/gpu/gpu_kernels.cuh index 4cf860bee..4fdd7ceea 100644 --- a/quest/src/gpu/gpu_kernels.cuh +++ b/quest/src/gpu/gpu_kernels.cuh @@ -293,7 +293,7 @@ __forceinline__ __device__ qindex getThreadsNthGlobalArrInd(qindex n, qindex thr } -template +template __global__ void kernel_statevec_anyCtrlFewTargDenseMatr( cu_qcomp* amps, qindex numThreads, int* ctrlsAndTargs, int numCtrls, qindex ctrlsAndTargsMask, int* targs, @@ -341,8 +341,12 @@ __global__ void kernel_statevec_anyCtrlFewTargDenseMatr( #pragma unroll for (qindex l=0; l +template __global__ void kernel_statevec_anyCtrlManyTargDenseMatr( cu_qcomp* globalCache, cu_qcomp* amps, qindex numThreads, qindex numBatchesPerThread, @@ -398,10 +402,16 @@ __global__ void kernel_statevec_anyCtrlManyTargDenseMatr( for (qindex l=0; l +template __global__ void kernel_densmatr_allTargDiagMatr_sub( cu_qcomp* amps, qindex numThreads, int rank, qindex logNumAmpsPerNode, cu_qcomp* elems, qindex numElems, cu_qcomp exponent ) { GET_THREAD_IND(n, numThreads); - // i = global row of nth local index - qindex i = n % numElems; - cu_qcomp fac = elems[i]; + cu_qcomp fac = getCuQcomp(1, 0); - if constexpr (HasPower) - fac = getCompPower(fac, exponent); + if constexpr (MultiplyLeft) { - if constexpr (!MultiplyOnly) { + qindex i = fast_getQuregGlobalRowFromFlatIndex(n, numElems); + cu_qcomp term = elems[i]; - // m = global index corresponding to n - qindex m = concatenateBits(rank, n, logNumAmpsPerNode); + if constexpr (HasPower) + term = getCompPower(term, exponent); - // j = global column corresponding to n - qindex j = m / numElems; + fac = term; + } + + if constexpr (MultiplyRight) { + + qindex m = concatenateBits(rank, n, logNumAmpsPerNode); + qindex j = fast_getQuregGlobalColFromFlatIndex(m, numElems); cu_qcomp term = elems[j]; - if constexpr(HasPower) + if constexpr (HasPower) term = getCompPower(term, exponent); - // conj after pow - term.y *= -1; + if constexpr (ConjRight) + term.y *= -1; + fac = fac * term; } diff --git a/quest/src/gpu/gpu_subroutines.cpp b/quest/src/gpu/gpu_subroutines.cpp index 0b0bc46a3..0e7bb9385 100644 --- a/quest/src/gpu/gpu_subroutines.cpp +++ b/quest/src/gpu/gpu_subroutines.cpp @@ -290,8 +290,9 @@ void gpu_statevec_anyCtrlOneTargDenseMatr_subA(Qureg qureg, vector ctrls, v #if COMPILE_CUQUANTUM + bool applyAdj = false; auto arr = unpackMatrixToCuQcomps(matr); - cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(qureg, ctrls, ctrlStates, {targ}, arr.data()); + cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(qureg, ctrls, ctrlStates, {targ}, arr.data(), applyAdj); #elif COMPILE_CUDA @@ -358,8 +359,9 @@ void gpu_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, ve #if COMPILE_CUQUANTUM + bool applyAdj = false; auto arr = unpackMatrixToCuQcomps(matr); - cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(qureg, ctrls, ctrlStates, {targ1, targ2}, arr.data()); + cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(qureg, ctrls, ctrlStates, {targ1, targ2}, arr.data(), applyAdj); #elif COMPILE_CUDA @@ -393,7 +395,7 @@ INSTANTIATE_FUNC_OPTIMISED_FOR_NUM_CTRLS( void, gpu_statevec_anyCtrlTwoTargDense */ -template +template void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr) { assert_numCtrlsMatchesNumCtrlStatesAndTemplateParam(ctrls.size(), ctrlStates.size(), NumCtrls); @@ -404,14 +406,21 @@ void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, ve auto matrElemsPtr = toCuQcomps(matr.gpuElemsFlat); auto matrElemsLen = matr.numRows * matr.numRows; - // conjugate every matrix element if necessary (cuStateVec cannot conj for us; only adjoint) - if (ApplyConj) + // assert the pre-condition assumed below + if (ApplyConj && ApplyTransp) + error_gpuDenseMatrixConjugatedAndTransposed(); + + // cuStateVec can effect the adjoint, but not the individual conjugate or transpose, + // and alas we only ever use one at a time (because applying matrix to the bra-qubits of + // a vectorised density matrix effectively transposes the matrix), so we effect transpose + // by manually conjugating then telling cuQuantum to adjoint (hehe!) + if (ApplyConj || ApplyTransp) thrust_setElemsToConjugate(matrElemsPtr, matrElemsLen); - cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(qureg, ctrls, ctrlStates, targs, matrElemsPtr); + cuquantum_statevec_anyCtrlAnyTargDenseMatrix_subA(qureg, ctrls, ctrlStates, targs, matrElemsPtr, ApplyTransp); - // undo conjugation (which is only not done if cuQuantum encounters a non-recoverable internal error) - if (ApplyConj) + // undo changes (which is only not done if cuQuantum encounters a non-recoverable internal error) + if (ApplyConj || ApplyTransp) thrust_setElemsToConjugate(matrElemsPtr, matrElemsLen); #elif COMPILE_CUDA @@ -454,10 +463,12 @@ void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, ve qindex numThreads = numBatches; qindex numBlocks = getNumBlocks(numThreads); - kernel_statevec_anyCtrlFewTargDenseMatr <<>> ( - ampsPtr, numThreads, - qubitsPtr, nCtrls, qubitStateMask, - targsPtr, matrPtr + kernel_statevec_anyCtrlFewTargDenseMatr + + <<>> ( + ampsPtr, numThreads, + qubitsPtr, nCtrls, qubitStateMask, + targsPtr, matrPtr ); } else { @@ -490,11 +501,13 @@ void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, ve qindex numKernelInvocations = numBlocks * NUM_THREADS_PER_BLOCK; qcomp* cache = gpu_getCacheOfSize(powerOf2(targs.size()), numKernelInvocations); - kernel_statevec_anyCtrlManyTargDenseMatr <<>> ( - toCuQcomps(cache), - ampsPtr, numThreads, numBatchesPerThread, - qubitsPtr, nCtrls, qubitStateMask, - targsPtr, targs.size(), powerOf2(targs.size()), matrPtr + kernel_statevec_anyCtrlManyTargDenseMatr + + <<>> ( + toCuQcomps(cache), + ampsPtr, numThreads, numBatchesPerThread, + qubitsPtr, nCtrls, qubitStateMask, + targsPtr, targs.size(), powerOf2(targs.size()), matrPtr ); } @@ -504,7 +517,7 @@ void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, ve } -INSTANTIATE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, gpu_statevec_anyCtrlAnyTargDenseMatr_sub, (Qureg, vector, vector, vector, CompMatr) ) +INSTANTIATE_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, gpu_statevec_anyCtrlAnyTargDenseMatr_sub, (Qureg, vector, vector, vector, CompMatr) ) @@ -709,7 +722,7 @@ void gpu_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, vector ctrls, vec } -INSTANTIATE_EXPONENTIABLE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, gpu_statevec_anyCtrlAnyTargDiagMatr_sub, (Qureg, vector, vector, vector, DiagMatr, qcomp) ) +INSTANTIATE_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( void, gpu_statevec_anyCtrlAnyTargDiagMatr_sub, (Qureg, vector, vector, vector, DiagMatr, qcomp) ) @@ -736,23 +749,21 @@ void gpu_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp } -template +template void gpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent) { assert_exponentMatchesTemplateParam(exponent, HasPower); - // in theory, we could use cuQuantum when HasPower=MultiplyOnly=true, - // treating FullStateDiagMatr like an N/2-qubit DiagMatr upon a SV, - // but this scenario is not worth the code complication - #if COMPILE_CUDA || COMPILE_CUQUANTUM qindex numThreads = qureg.numAmpsPerNode; qindex numBlocks = getNumBlocks(numThreads); - kernel_densmatr_allTargDiagMatr_sub <<>> ( - toCuQcomps(qureg.gpuAmps), numThreads, qureg.rank, qureg.logNumAmpsPerNode, - toCuQcomps(util_getGpuMemPtr(matr)), matr.numElems, toCuQcomp(exponent) + kernel_densmatr_allTargDiagMatr_sub + + <<>> ( + toCuQcomps(qureg.gpuAmps), numThreads, qureg.rank, qureg.logNumAmpsPerNode, + toCuQcomps(util_getGpuMemPtr(matr)), matr.numElems, toCuQcomp(exponent) ); #else @@ -764,10 +775,12 @@ void gpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp template void gpu_statevec_allTargDiagMatr_sub(Qureg, FullStateDiagMatr, qcomp); template void gpu_statevec_allTargDiagMatr_sub(Qureg, FullStateDiagMatr, qcomp); -template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); -template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); -template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); -template void gpu_densmatr_allTargDiagMatr_sub(Qureg, FullStateDiagMatr, qcomp); +template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr qureg conj(matr) +template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr qureg +template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // qureg matr +template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr^P qureg conj(matr^P) +template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // matr^P qureg +template void gpu_densmatr_allTargDiagMatr_sub (Qureg, FullStateDiagMatr, qcomp); // qureg matr^P diff --git a/quest/src/gpu/gpu_subroutines.hpp b/quest/src/gpu/gpu_subroutines.hpp index 04fabe0a2..7ec3f6696 100644 --- a/quest/src/gpu/gpu_subroutines.hpp +++ b/quest/src/gpu/gpu_subroutines.hpp @@ -60,7 +60,7 @@ template void gpu_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qur template void gpu_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr); -template void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr); +template void gpu_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr); /* @@ -75,7 +75,7 @@ template void gpu_st template void gpu_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); -template void gpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); +template void gpu_densmatr_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent); /* diff --git a/tests/unit/operations.cpp b/tests/unit/operations.cpp index 041000c4f..b8522321a 100644 --- a/tests/unit/operations.cpp +++ b/tests/unit/operations.cpp @@ -581,6 +581,25 @@ qmatrix getReferenceMatrix(auto matrixRefGen, vector targs, auto additional } +/* + * Template parameters which specify how the reference + * operatorshould be applied upon the reference state. + * Let |psi> be a statevector, rho be a density matrix, + * and matr be an operator matrix. The options perform: + * + * apply: |psi> -> matr |psi>, rho -> matr rho adj(matr) + * multiply: |psi> -> matr |psi>, rho -> matr rho + * postmultiply: rho -> rho matr + * + * Note this is necessarily a template parameter (rather + * than just a runtime parameter) only because the + * postMultiplyReferenceOperator() function is defined + * only upon qmatrix (for density matrices) + */ + +enum ApplyFlag { apply, multiply, postmultiply }; + + /* * display all/only relevant inputs given to an * API operation when its subsequent test fails. @@ -681,8 +700,8 @@ void CAPTURE_RELEVANT( vector ctrls, vector states, vector targs, * matrixRefGen) is formatted. */ -template -void testOperationCorrectness(auto operation, auto matrixRefGen, bool multiplyOnly) { +template +void testOperationCorrectness(auto operation, auto matrixRefGen) { PREPARE_TEST( numQubits, statevecQuregs, densmatrQuregs, statevecRef, densmatrRef ); @@ -724,18 +743,25 @@ void testOperationCorrectness(auto operation, auto matrixRefGen, bool multiplyOn auto allArgs = std::tuple_cat(tuple{operation, qureg}, primaryArgs, furtherArgs); std::apply(apiFunc, allArgs); - // update reference state - (multiplyOnly)? - multiplyReferenceOperator(stateRef, ctrls, states, targs, matrixRef): - applyReferenceOperator(stateRef, ctrls, states, targs, matrixRef); + // update reference state (ctrls & states happen to only ever be used by apply) + if constexpr (Apply == apply) applyReferenceOperator( stateRef, ctrls, states, targs, matrixRef); + if constexpr (Apply == multiply) multiplyReferenceOperator( stateRef, ctrls, states, targs, matrixRef); + if constexpr (Apply == postmultiply) postMultiplyReferenceOperator(stateRef, ctrls, states, targs, matrixRef); }; // report operation's input parameters if any subsequent test fails CAPTURE_RELEVANT( ctrls, states, targs, furtherArgs ); - // test API operation on all available deployment combinations (e.g. OMP, MPI, MPI+GPU, etc) - SECTION( LABEL_STATEVEC ) { TEST_ON_CACHED_QUREGS(statevecQuregs, statevecRef, testFunc); } - SECTION( LABEL_DENSMATR ) { TEST_ON_CACHED_QUREGS(densmatrQuregs, densmatrRef, testFunc); } + // test API operation on all available deployment combinations (e.g. OMP, MPI, MPI+GPU, etc), + // though the postMultiply*() functions do not accept statevectors + if constexpr (Apply != postmultiply) { + SECTION( LABEL_STATEVEC ) { + TEST_ON_CACHED_QUREGS(statevecQuregs, statevecRef, testFunc); + } + } + SECTION( LABEL_DENSMATR ) { + TEST_ON_CACHED_QUREGS(densmatrQuregs, densmatrRef, testFunc); + } // free any heap-alloated API matrices and restore epsilon freeRemainingArgs(furtherArgs); @@ -786,11 +812,11 @@ auto getFixedRemainingArgs(vector targs) { if constexpr (Args == pauligad) return tuple{ getPauliStr("XXX", targs), 0 }; // (XXX, angle) } -template -void testOperationValidation(auto operation, bool multiplyOnly) { +template +void testOperationValidation(auto operation) { - // use any cached Qureg - Qureg qureg = getCachedStatevecs().begin()->second; + // use any cached Qureg (though postMultiply*() functions accept only density matrices) + Qureg qureg = getCachedDensmatrs().begin()->second; // in lieu of preparing random inputs like testOperationCorrectness() // above, we instead obtain simple, fixed, compatible inputs @@ -963,7 +989,7 @@ void testOperationValidation(auto operation, bool multiplyOnly) { return; // which enforce unitarity - if (multiplyOnly) + if (Apply != apply) return; if constexpr (Args == compmatr || Args == diagmatr) @@ -1005,6 +1031,9 @@ void testOperationValidation(auto operation, bool multiplyOnly) { SECTION( "targeted amps fit in node" ) { + // simplest to trigger validation using a statevector + qureg = getCachedStatevecs().begin()->second; + // can only be validated when environment AND qureg // are distributed (over more than 1 node, of course) if (qureg.numNodes < 2) @@ -1050,7 +1079,7 @@ void testOperationValidation(auto operation, bool multiplyOnly) { SECTION( "non-unitary exponent" ) { // not relevant for functions which do not assert unitarity - if (multiplyOnly) + if (Apply != apply) return; if constexpr (Args == diagpower) @@ -1065,7 +1094,7 @@ void testOperationValidation(auto operation, bool multiplyOnly) { // when being applied as a unitary, abs(elem)=1 so there's no // possibility of divergence (we'd merely trigger isUnitary) - if (!multiplyOnly) + if (Apply == apply) return; if constexpr (Args == diagpower) @@ -1086,6 +1115,18 @@ void testOperationValidation(auto operation, bool multiplyOnly) { REQUIRE_THROWS_WITH( apiFunc(), ContainsSubstring("zero vector") ); } + SECTION( "qureg type" ) { + + // only postMultiply*() functions discriminate Qureg + if (Apply != postmultiply) + return; + + // use any statevector + qureg = getCachedStatevecs().begin()->second; + + REQUIRE_THROWS_WITH( apiFunc(), ContainsSubstring("Expected a density matrix") ); + } + freeRemainingArgs(furtherArgs); } @@ -1095,27 +1136,20 @@ void testOperationValidation(auto operation, bool multiplyOnly) { * inputs as indicated by the template flags */ -template -void testOperation(auto operation, auto matrixRefGen, bool multiplyOnly) { +template +void testOperation(auto operation, auto matrixRefGen) { assertNumQubitsFlagsAreValid(Ctrls, Targs); SECTION( LABEL_CORRECTNESS ) { - testOperationCorrectness(operation, matrixRefGen, multiplyOnly); + testOperationCorrectness(operation, matrixRefGen); } SECTION( LABEL_VALIDATION ) { - testOperationValidation(operation, multiplyOnly); + testOperationValidation(operation); } } -template -void testOperation(auto operation, auto matrixRefGen) { - - bool multiplyOnly = false; - testOperation(operation, matrixRefGen, multiplyOnly); -} - /* * perform unit tests for the four distinctly-controlled @@ -1234,13 +1268,13 @@ void testOperation(auto operation, auto matrixRefGen) { // defines a Catch2 test-case for the implied function #define TEST_CASE_OPERATION( namesuffix, numctrls, numtargs, argtype, matrixgen ) \ TEST_CASE( GET_FUNC_NAME_STR(numctrls, namesuffix), TEST_CATEGORY ) { \ - testOperation( \ + testOperation( \ GET_CASTED_FUNC(namesuffix, numctrls, numtargs, argtype), \ matrixgen); \ } -// automate the testing of a function for all its controlled variants -#define TEST_ALL_CTRL_OPERATIONS( namesuffix, numtargs, argtype, matrixgen ) \ +// automate the testing of a apply*() function for all its controlled variants +#define TEST_ALL_CTRL_OPERATIONS( namesuffix, numtargs, argtype, matrixgen ) \ TEST_CASE_OPERATION( namesuffix, zero, numtargs, argtype, matrixgen ) \ TEST_CASE_OPERATION( namesuffix, one, numtargs, argtype, matrixgen ) \ TEST_CASE_OPERATION( namesuffix, any, numtargs, argtype, matrixgen ) \ @@ -1289,61 +1323,105 @@ TEST_ALL_CTRL_OPERATIONS( PhaseGadget, any, scalar, VariableSizeParameterisedMat * non-controlled operations with no C++ overloads */ -TEST_CASE( "multiplyPauliStr", TEST_CATEGORY ) { testOperation(multiplyPauliStr, nullptr, true); } -TEST_CASE( "multiplyPauliGadget", TEST_CATEGORY ) { testOperation(multiplyPauliGadget, nullptr, true); } -TEST_CASE( "multiplyCompMatr1", TEST_CATEGORY ) { testOperation(multiplyCompMatr1, nullptr, true); } -TEST_CASE( "multiplyCompMatr2", TEST_CATEGORY ) { testOperation(multiplyCompMatr2, nullptr, true); } -TEST_CASE( "multiplyDiagMatr1", TEST_CATEGORY ) { testOperation(multiplyDiagMatr1, nullptr, true); } -TEST_CASE( "multiplyDiagMatr2", TEST_CATEGORY ) { testOperation(multiplyDiagMatr2, nullptr, true); } -TEST_CASE( "applyPhaseFlip", TEST_CATEGORY ) { testOperation (applyPhaseFlip, VariableSizeMatrices::PF(1)); } -TEST_CASE( "applyTwoQubitPhaseFlip", TEST_CATEGORY ) { testOperation (applyTwoQubitPhaseFlip, VariableSizeMatrices::PF(2)); } -TEST_CASE( "applyPhaseShift", TEST_CATEGORY ) { testOperation(applyPhaseShift, ParameterisedMatrices::PS); } -TEST_CASE( "applyTwoQubitPhaseShift", TEST_CATEGORY ) { testOperation(applyTwoQubitPhaseShift, ParameterisedMatrices::PS2); } +TEST_CASE( "applyPhaseFlip", TEST_CATEGORY ) { testOperation (applyPhaseFlip, VariableSizeMatrices::PF(1)); } +TEST_CASE( "applyTwoQubitPhaseFlip", TEST_CATEGORY ) { testOperation (applyTwoQubitPhaseFlip, VariableSizeMatrices::PF(2)); } +TEST_CASE( "applyPhaseShift", TEST_CATEGORY ) { testOperation(applyPhaseShift, ParameterisedMatrices::PS ); } +TEST_CASE( "applyTwoQubitPhaseShift", TEST_CATEGORY ) { testOperation(applyTwoQubitPhaseShift, ParameterisedMatrices::PS2 ); } + +TEST_CASE( "multiplySwap", TEST_CATEGORY ) { testOperation(multiplySwap, FixedMatrices::SWAP); } +TEST_CASE( "multiplyPauliX", TEST_CATEGORY ) { testOperation(multiplyPauliX, FixedMatrices::X); } +TEST_CASE( "multiplyPauliY", TEST_CATEGORY ) { testOperation(multiplyPauliY, FixedMatrices::Y); } +TEST_CASE( "multiplyPauliZ", TEST_CATEGORY ) { testOperation(multiplyPauliZ, FixedMatrices::Z); } +TEST_CASE( "multiplyPauliStr", TEST_CATEGORY ) { testOperation(multiplyPauliStr, nullptr); } +TEST_CASE( "multiplyPauliGadget", TEST_CATEGORY ) { testOperation(multiplyPauliGadget, nullptr); } +TEST_CASE( "multiplyCompMatr1", TEST_CATEGORY ) { testOperation(multiplyCompMatr1, nullptr); } +TEST_CASE( "multiplyCompMatr2", TEST_CATEGORY ) { testOperation(multiplyCompMatr2, nullptr); } +TEST_CASE( "multiplyDiagMatr1", TEST_CATEGORY ) { testOperation(multiplyDiagMatr1, nullptr); } +TEST_CASE( "multiplyDiagMatr2", TEST_CATEGORY ) { testOperation(multiplyDiagMatr2, nullptr); } + +TEST_CASE( "postMultiplySwap", TEST_CATEGORY ) { testOperation(postMultiplySwap, FixedMatrices::SWAP); } +TEST_CASE( "postMultiplyPauliX", TEST_CATEGORY ) { testOperation(postMultiplyPauliX, FixedMatrices::X); } +TEST_CASE( "postMultiplyPauliY", TEST_CATEGORY ) { testOperation(postMultiplyPauliY, FixedMatrices::Y); } +TEST_CASE( "postMultiplyPauliZ", TEST_CATEGORY ) { testOperation(postMultiplyPauliZ, FixedMatrices::Z); } +TEST_CASE( "postMultiplyPauliStr", TEST_CATEGORY ) { testOperation(postMultiplyPauliStr, nullptr); } +TEST_CASE( "postMultiplyPauliGadget", TEST_CATEGORY ) { testOperation(postMultiplyPauliGadget, nullptr); } +TEST_CASE( "postMultiplyCompMatr1", TEST_CATEGORY ) { testOperation(postMultiplyCompMatr1, nullptr); } +TEST_CASE( "postMultiplyCompMatr2", TEST_CATEGORY ) { testOperation(postMultiplyCompMatr2, nullptr); } +TEST_CASE( "postMultiplyDiagMatr1", TEST_CATEGORY ) { testOperation(postMultiplyDiagMatr1, nullptr); } +TEST_CASE( "postMultiplyDiagMatr2", TEST_CATEGORY ) { testOperation(postMultiplyDiagMatr2, nullptr); } /* * non-controlled operations which have a C++ overload * (because they accept qubit lists which become vector), * and so which require explicit casting to resolve the - * compiler ambiguity + * compiler ambiguity (spaghetti 4 lyf) */ +TEST_CASE( "applyMultiQubitPhaseFlip", TEST_CATEGORY ) { + auto func = static_cast(applyMultiQubitPhaseFlip); + testOperation(func, VariableSizeMatrices::PF); +} + +TEST_CASE( "applyMultiQubitPhaseShift", TEST_CATEGORY ) { + auto func = static_cast(applyMultiQubitPhaseShift); + testOperation(func, VariableSizeParameterisedMatrices::PS); +} + + TEST_CASE( "multiplyCompMatr", TEST_CATEGORY ) { auto func = static_cast(multiplyCompMatr); - testOperation(func, nullptr, true); + testOperation(func, nullptr); } TEST_CASE( "multiplyDiagMatr", TEST_CATEGORY ) { auto func = static_cast(multiplyDiagMatr); - testOperation(func, nullptr, true); + testOperation(func, nullptr); } TEST_CASE( "multiplyDiagMatrPower", TEST_CATEGORY ) { auto func = static_cast(multiplyDiagMatrPower); - testOperation(func, nullptr, true); + testOperation(func, nullptr); } TEST_CASE( "multiplyMultiQubitNot", TEST_CATEGORY ) { auto func = static_cast(multiplyMultiQubitNot); - testOperation(func, VariableSizeMatrices::X, true); + testOperation(func, VariableSizeMatrices::X); } TEST_CASE( "multiplyPhaseGadget", TEST_CATEGORY ) { auto func = static_cast(multiplyPhaseGadget); - testOperation(func, VariableSizeParameterisedMatrices::Z, true); + testOperation(func, VariableSizeParameterisedMatrices::Z); } -TEST_CASE( "applyMultiQubitPhaseFlip", TEST_CATEGORY ) { - auto func = static_cast(applyMultiQubitPhaseFlip); - testOperation(func, VariableSizeMatrices::PF); + +TEST_CASE( "postMultiplyCompMatr", TEST_CATEGORY ) { + auto func = static_cast(postMultiplyCompMatr); + testOperation(func, nullptr); } -TEST_CASE( "applyMultiQubitPhaseShift", TEST_CATEGORY ) { - auto func = static_cast(applyMultiQubitPhaseShift); - testOperation(func, VariableSizeParameterisedMatrices::PS); +TEST_CASE( "postMultiplyDiagMatr", TEST_CATEGORY ) { + auto func = static_cast(postMultiplyDiagMatr); + testOperation(func, nullptr); +} + +TEST_CASE( "postMultiplyDiagMatrPower", TEST_CATEGORY ) { + auto func = static_cast(postMultiplyDiagMatrPower); + testOperation(func, nullptr); +} + +TEST_CASE( "postMultiplyMultiQubitNot", TEST_CATEGORY ) { + auto func = static_cast(postMultiplyMultiQubitNot); + testOperation(func, VariableSizeMatrices::X); +} + +TEST_CASE( "postMultiplyPhaseGadget", TEST_CATEGORY ) { + auto func = static_cast(postMultiplyPhaseGadget); + testOperation(func, VariableSizeParameterisedMatrices::Z); } + /* * operations which need custom logic */ @@ -1804,6 +1882,65 @@ TEST_CASE( "multiplyFullStateDiagMatrPower", TEST_CATEGORY LABEL_MIXED_DEPLOY_TA } +TEST_CASE( "postMultiplyFullStateDiagMatr", TEST_CATEGORY LABEL_MIXED_DEPLOY_TAG ) { + + PREPARE_TEST( numQubits, cachedSV, cachedDM, refSV, refDM ); + + auto cachedMatrs = getCachedFullStateDiagMatrs(); + + SECTION( LABEL_CORRECTNESS ) { + + qmatrix refMatr = getRandomDiagonalMatrix(getPow2(numQubits)); + auto apiFunc = postMultiplyFullStateDiagMatr; + + GENERATE( range(0, getNumTestedMixedDeploymentRepetitions()) ); + + SECTION( LABEL_DENSMATR ) { + + auto refFunc = [&] (qmatrix& state, qmatrix matr) { postMultiplyReferenceOperator(state, matr); }; + + TEST_ON_CACHED_QUREG_AND_MATRIX( cachedDM, cachedMatrs, apiFunc, refDM, refMatr, refFunc); + } + } + + /// @todo input validation +} + + +TEST_CASE( "postMultiplyFullStateDiagMatrPower", TEST_CATEGORY LABEL_MIXED_DEPLOY_TAG ) { + + PREPARE_TEST( numQubits, cachedSV, cachedDM, refSV, refDM ); + + auto cachedMatrs = getCachedFullStateDiagMatrs(); + + SECTION( LABEL_CORRECTNESS ) { + + qmatrix refMatr = getRandomDiagonalMatrix(getPow2(numQubits)); + qcomp exponent = getRandomComplex(); + + auto apiFunc = [&](Qureg qureg, FullStateDiagMatr matr) { + return postMultiplyFullStateDiagMatrPower(qureg, matr, exponent); + }; + + CAPTURE( exponent ); + + GENERATE( range(0, getNumTestedMixedDeploymentRepetitions()) ); + + SECTION( LABEL_DENSMATR ) { + + auto refFunc = [&] (qmatrix& state, qmatrix matr) { + matr = getPowerOfDiagonalMatrix(matr, exponent); + postMultiplyReferenceOperator(state, matr); + }; + + TEST_ON_CACHED_QUREG_AND_MATRIX( cachedDM, cachedMatrs, apiFunc, refDM, refMatr, refFunc); + } + } + + /// @todo input validation +} + + TEST_CASE( "applyFullStateDiagMatr", TEST_CATEGORY LABEL_MIXED_DEPLOY_TAG ) { PREPARE_TEST( numQubits, cachedSV, cachedDM, refSV, refDM ); @@ -1921,6 +2058,35 @@ TEST_CASE( "multiplyPauliStrSum", TEST_CATEGORY LABEL_MIXED_DEPLOY_TAG ) { } +TEST_CASE( "postMultiplyPauliStrSum", TEST_CATEGORY LABEL_MIXED_DEPLOY_TAG ) { + + PREPARE_TEST( numQubits, statevecQuregs, densmatrQuregs, statevecRef, densmatrRef ); + + SECTION( LABEL_CORRECTNESS ) { + + int numQubits = getNumCachedQubits(); + int numTerms = GENERATE_COPY( 1, 2, 10 ); + + PauliStrSum sum = createRandomPauliStrSum(numQubits, numTerms); + + auto testFunc = [&](Qureg qureg, auto& ref) { + + // must use (and ergo make) an identically-deployed workspace + Qureg workspace = createCloneQureg(qureg); + postMultiplyPauliStrSum(qureg, sum, workspace); + destroyQureg(workspace); + + ref = ref * getMatrix(sum, numQubits); + }; + + CAPTURE( numTerms ); + SECTION( LABEL_DENSMATR ) { TEST_ON_CACHED_QUREGS(densmatrQuregs, densmatrRef, testFunc); } + } + + /// @todo input validation +} + + TEST_CASE( "applyNonUnitaryPauliGadget", TEST_CATEGORY ) { PREPARE_TEST( numQubits, statevecQuregs, densmatrQuregs, statevecRef, densmatrRef ); diff --git a/tests/utils/evolve.cpp b/tests/utils/evolve.cpp index 14df4de7f..38b43cc9c 100644 --- a/tests/utils/evolve.cpp +++ b/tests/utils/evolve.cpp @@ -180,6 +180,13 @@ void multiplyReferenceOperator(qmatrix& state, qmatrix matrix) { state = matrix * state; } +void postMultiplyReferenceOperator(qmatrix& state, qmatrix matrix) { + DEMAND( state.size() == matrix.size() ); + + // we right-multiply upon density matrices only + state = state * matrix; +} + // overloads with ctrls, states and targs (given sub-operator) @@ -206,6 +213,12 @@ void multiplyReferenceOperator(qmatrix& state, vector ctrls, vector ct multiplyReferenceOperator(state, left); } +void postMultiplyReferenceOperator(qmatrix& state, vector ctrls, vector ctrlStates, vector targs, qmatrix matrix) { + + qmatrix left = getFullStateOperator(ctrls, ctrlStates, targs, matrix, getLog2(state.size())); + postMultiplyReferenceOperator(state, left); +} + // overloads with only ctrls and targs @@ -225,6 +238,10 @@ void multiplyReferenceOperator(qmatrix& state, vector ctrls, vector ta multiplyReferenceOperator(state, ctrls, {}, targs, matrix); } +void postMultiplyReferenceOperator(qmatrix& state, vector ctrls, vector targs, qmatrix matrix) { + + postMultiplyReferenceOperator(state, ctrls, {}, targs, matrix); +} // overloads with only targs @@ -245,6 +262,10 @@ void multiplyReferenceOperator(qmatrix& state, vector targs, qmatrix matrix multiplyReferenceOperator(state, {}, {}, targs, matrix); } +void postMultiplyReferenceOperator(qmatrix& state, vector targs, qmatrix matrix) { + + postMultiplyReferenceOperator(state, {}, {}, targs, matrix); +} // overloads with only targs and kraus operators diff --git a/tests/utils/evolve.hpp b/tests/utils/evolve.hpp index e0b186e67..876130167 100644 --- a/tests/utils/evolve.hpp +++ b/tests/utils/evolve.hpp @@ -21,25 +21,29 @@ using std::vector; -void applyReferenceOperator( qvector& state, vector ctrls, vector states, vector targs, qmatrix matrix); -void applyReferenceOperator( qmatrix& state, vector ctrls, vector states, vector targs, qmatrix matrix); -void multiplyReferenceOperator(qvector& state, vector ctrls, vector states, vector targs, qmatrix matrix); -void multiplyReferenceOperator(qmatrix& state, vector ctrls, vector states, vector targs, qmatrix matrix); - -void applyReferenceOperator( qvector& state, vector ctrls, vector targs, qmatrix matrix); -void applyReferenceOperator( qmatrix& state, vector ctrls, vector targs, qmatrix matrix); -void multiplyReferenceOperator(qvector& state, vector ctrls, vector targs, qmatrix matrix); -void multiplyReferenceOperator(qmatrix& state, vector ctrls, vector targs, qmatrix matrix); - -void applyReferenceOperator( qvector& state, vector targs, qmatrix matrix); -void applyReferenceOperator( qmatrix& state, vector targs, qmatrix matrix); -void multiplyReferenceOperator(qvector& state, vector targs, qmatrix matrix); -void multiplyReferenceOperator(qmatrix& state, vector targs, qmatrix matrix); - -void applyReferenceOperator( qvector& state, qmatrix matrix); -void applyReferenceOperator( qmatrix& state, qmatrix matrix); -void multiplyReferenceOperator(qvector& state, qmatrix matrix); -void multiplyReferenceOperator(qmatrix& state, qmatrix matrix); +void applyReferenceOperator( qvector& state, vector ctrls, vector states, vector targs, qmatrix matrix); +void applyReferenceOperator( qmatrix& state, vector ctrls, vector states, vector targs, qmatrix matrix); +void multiplyReferenceOperator( qvector& state, vector ctrls, vector states, vector targs, qmatrix matrix); +void multiplyReferenceOperator( qmatrix& state, vector ctrls, vector states, vector targs, qmatrix matrix); +void postMultiplyReferenceOperator(qmatrix& state, vector ctrls, vector states, vector targs, qmatrix matrix); + +void applyReferenceOperator( qvector& state, vector ctrls, vector targs, qmatrix matrix); +void applyReferenceOperator( qmatrix& state, vector ctrls, vector targs, qmatrix matrix); +void multiplyReferenceOperator( qvector& state, vector ctrls, vector targs, qmatrix matrix); +void multiplyReferenceOperator( qmatrix& state, vector ctrls, vector targs, qmatrix matrix); +void postMultiplyReferenceOperator(qmatrix& state, vector ctrls, vector targs, qmatrix matrix); + +void applyReferenceOperator( qvector& state, vector targs, qmatrix matrix); +void applyReferenceOperator( qmatrix& state, vector targs, qmatrix matrix); +void multiplyReferenceOperator( qvector& state, vector targs, qmatrix matrix); +void multiplyReferenceOperator( qmatrix& state, vector targs, qmatrix matrix); +void postMultiplyReferenceOperator(qmatrix& state, vector targs, qmatrix matrix); + +void applyReferenceOperator( qvector& state, qmatrix matrix); +void applyReferenceOperator( qmatrix& state, qmatrix matrix); +void multiplyReferenceOperator( qvector& state, qmatrix matrix); +void multiplyReferenceOperator( qmatrix& state, qmatrix matrix); +void postMultiplyReferenceOperator(qmatrix& state, qmatrix matrix); void applyReferenceOperator(qmatrix& state, vector targs, vector matrices);