diff --git a/pyci/__init__.py b/pyci/__init__.py
index 36e9675..5fda201 100644
--- a/pyci/__init__.py
+++ b/pyci/__init__.py
@@ -18,7 +18,7 @@
 from .pyci import __version__, c_long, c_ulong, c_double, sparse_op
 
 from .pyci import secondquant_op, wavefunction, one_spin_wfn, two_spin_wfn
-from .pyci import doci_wfn, fullci_wfn, genci_wfn, sparse_op
+from .pyci import doci_wfn, fullci_wfn, genci_wfn, nonsingletci_wfn, sparse_op
 from .pyci import get_num_threads, set_num_threads, popcnt, ctz
 from .pyci import compute_overlap, compute_rdms, compute_transition_rdms
 from .pyci import add_hci, compute_enpt2
@@ -45,6 +45,7 @@
     "doci_wfn",
     "fullci_wfn",
     "genci_wfn",
+    "nonsingletci_wfn",
     "sparse_op",
     "get_num_threads",
     "set_num_threads",
diff --git a/pyci/fanci/__init__.py b/pyci/fanci/__init__.py
index 150f7e4..16a6a7c 100644
--- a/pyci/fanci/__init__.py
+++ b/pyci/fanci/__init__.py
@@ -10,6 +10,7 @@
     "AP1roG",
     "DetRatio",
     "pCCDS",
+    "AP1roGeneralizedSeno",
 ]
 
 
@@ -18,3 +19,4 @@
 from .ap1rog import AP1roG
 from .detratio import DetRatio
 from .pccds import pCCDS
+from .ap1rogen import AP1roGeneralizedSeno
diff --git a/pyci/fanci/ap1rogen.py b/pyci/fanci/ap1rogen.py
new file mode 100644
index 0000000..c1c8bdb
--- /dev/null
+++ b/pyci/fanci/ap1rogen.py
@@ -0,0 +1,195 @@
+r"""
+FanCI AP1roGeneralizedseno module for AP1roGSDGeneralized_sen-o wavefunction.
+
+"""
+
+from typing import Any, Union
+
+import numpy as np
+
+import pyci
+
+from ..pyci import AP1roGeneralizedSenoObjective
+from .fanci import FanCI
+import pdb
+
+
+__all___ = [
+    "AP1roGeneralizedSeno",
+]
+
+
+class AP1roGeneralizedSeno(FanCI):
+    r"""
+    DOC
+    """
+
+    def __init__(
+        self,
+        ham: pyci.hamiltonian,
+        nocc: int,
+        nproj: int = None,
+        wfn: pyci.nonsingletci_wfn = None,
+        fill: str = 'excitation',
+        **kwargs: Any,
+    ) -> None:
+        r"""
+        Initialize the FanCI problem.
+
+        Parameters
+        ----------
+        ham : pyci.hamiltonian
+            PyCI Hamiltonian.
+        nocc : int
+            Number of occupied orbitals.
+        nproj : int, optional
+            Number of determinants in projection ("P") space.
+        wfn : pyci.nonsingletci_wfn
+            If specified, this PyCI wave function defines the projection ("P") space.
+        fill : 
+        kwargs : Any, optional
+            Additional keyword arguments for base FanCI class.
+
+        """
+        # SEE OTHER FANCI WFNS
+
+        if not isinstance(ham, pyci.hamiltonian):
+            raise TypeError(f"Invalid `ham` type `{type(ham)}`; must be `pyci.hamiltonian`")
+
+        nparam = nocc * (ham.nbasis - nocc) + (2 * nocc) * (2 * (ham.nbasis - nocc)) + 1 #less params considering we added singles as well
+        nproj = nparam if nproj is None else nproj
+
+        # if nproj > nparam:
+        #     raise ValueError("nproj cannot be greater than the size of the space")
+
+        if wfn is None:
+            # wfn = pyci.doci_wfn(ham.nbasis, nocc, nocc)
+            # wfn.add_excited_dets(1) # add pair excited determinants
+            wfn = pyci.fullci_wfn(ham.nbasis, nocc, nocc)
+            # exc=0 ensures addint HF determinannt first
+            pyci.add_excitations(wfn,1,2,3,4)
+            print("Printing FCI wfn dets: ")
+            for i, sd in enumerate(wfn.to_occ_array()):
+                sd = np.array(sd)
+                print(wfn.to_det_array()[i],np.concatenate((sd[0],sd[1]+ham.nbasis)))
+            
+            wfn = pyci.nonsingletci_wfn(wfn)
+                   
+        elif not isinstance(wfn, pyci.nonsingletci_wfn):
+            raise TypeError(f"Invalid `wfn` type `{type(wfn)}`; must be `pyci.nonsingletci_wfn`")
+        elif wfn.nocc != nocc:
+            raise ValueError(f"wfn.nocc does not match `nocc={nocc}` parameter")
+
+
+        # Initialize base class
+        FanCI.__init__(self, ham, wfn, nproj, nparam, **kwargs)
+        print("Nonsingletci nbasis: ",wfn.nbasis)
+        print("\n\nPrinting nonsingletci wfn dets: ")
+        for sd in (self._sspace):
+            print(sd)
+            # sd = np.array(sd)
+            # print(np.concatenate((sd[0],sd[1]+ham.nbasis)))
+        print("Done printing\n\n")
+
+        # Assign reference occupations
+        #ref_occs_up = np.arange(nocc_up, dtype=pyci.c_long)
+        #ref_occs_dn = np.arange(nocc_dn, dtype=pyci.c_long)
+ 
+        # Save sub-class-specific attributes
+        #self._ref_occs = [ref_occs_up, ref_occs_dn]
+
+        # Initiazlize C++ extension
+        try:
+            norm_det = kwargs["norm_det"]
+            idx_det_cons = np.asarray([elem[0] for elem in norm_det], dtype=int)
+            det_cons = np.asarray([elem[1] for elem in norm_det], dtype=int)
+        except KeyError:
+            idx_det_cons = None
+            det_cons = None
+         
+        try:
+            norm_param = kwargs["norm_param"]
+            idx_param_cons = np.asarray([elem[0] for elem in norm_param], dtype=int)
+            param_cons = np.asarray([elem[1] for elem in norm_param], dtype=int)
+        except KeyError:
+            idx_param_cons = None
+            param_cons = None
+
+        self._cext = AP1roGeneralizedSenoObjective(
+            self._ci_op, self._wfn,
+            idx_det_cons=idx_det_cons, det_cons=det_cons,
+            idx_param_cons=idx_param_cons, param_cons=param_cons,
+        )
+
+    def compute_overlap(self, x: np.ndarray) -> np.ndarray:
+        r"""
+        Compute the FanCI overlap vector.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Parameter array, [p_0, p_1, ..., p_n].
+
+        Returns
+        -------
+        ovlp : np.ndarray
+            Overlap array.
+
+        """
+        return self._cext.overlap(x)
+
+    def compute_overlap_deriv(self, x: np.ndarray) -> np.ndarray:
+        r"""
+        Compute the FanCI overlap derivative matrix.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Parameter array, [p_0, p_1, ..., p_n].
+
+        Returns
+        -------
+        ovlp : np.ndarray
+            Overlap derivative array.
+
+        """
+        return self._cext.d_overlap(x)
+
+    def compute_objective(self, x: np.ndarray) -> np.ndarray:
+        r"""
+        Compute the FanCI objective function.
+
+            f : x[k] -> y[n]
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Parameter array, [p_0, p_1, ..., p_n, E].
+
+        Returns
+        -------
+        obj : np.ndarray
+            Objective vector.
+
+        """
+        return self._cext.objective(self._ci_op, x)
+
+    def compute_jacobian(self, x: np.ndarray) -> np.ndarray:
+        r"""
+        Compute the Jacobian of the FanCI objective function.
+
+            j : x[k] -> y[n, k]
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Parameter array, [p_0, p_1, ..., p_n, E].
+
+        Returns
+        -------
+        jac : np.ndarray
+            Jacobian matrix.
+
+        """
+        return self._cext.jacobian(self._ci_op, x)
+
diff --git a/pyci/fanci/fanci.py b/pyci/fanci/fanci.py
index 4d81ac7..1052693 100644
--- a/pyci/fanci/fanci.py
+++ b/pyci/fanci/fanci.py
@@ -13,6 +13,8 @@
 
 from scipy.optimize import OptimizeResult, least_squares, root
 
+from scipy.optimize import minimize
+
 import pyci
 
 
@@ -200,11 +202,28 @@ def __init__(
         wfn = fill_wavefunction(wfn, nproj, fill)
 
         # Compute CI matrix operator with nproj rows and len(wfn) columns
-        ci_op = pyci.sparse_op(ham, wfn, nrow=nproj, ncol=len(wfn), symmetric=False)
+        if type(wfn).__name__ == "nonsingletci_wfn":
+            ci_op = pyci.sparse_op(ham, wfn, nrow=nproj, ncol=len(wfn), symmetric=False, wfntype="nonsingletci")
+        else:
+            ci_op = pyci.sparse_op(ham, wfn, nrow=nproj, ncol=len(wfn), symmetric=False)
 
         # Compute arrays of occupations
         sspace = wfn.to_occ_array()
+
+        # if type(wfn).__name__ == "nonsingletci_wfn":
+        #     pspace = sspace[np.r_[0:13, 21:29, 37:45, 53:60, 66:72, 75:77, 81, 99:102, 104:108, 109:117, 126:129, 131:135, 
+        #               136:144, 171:183, 189:195, 201:206, 209, 221:224, 225:231, 2316:239, 240:246, 261:267, 
+        #               273:278, 282:286, 288, 300:302, 303:309, 314:316, 317:323, 337:344, 348:352, 356:359, 
+        #               365:369, 371:375, 381:384, 388:391, 393:395, 400:403, 405:408, 413:416, 418:420, 422, 
+        #               424:426, 427, 430]]
+        #     # nproj = len(pspace)
+        # else:
         pspace = sspace[:nproj]
+        print("len(sspace), len(pspace): ", len(sspace), len(pspace))
+        # dec_sspace = wfn.to_det_array()
+        # print("Printing sspace")
+        # for dec, bin in zip(dec_sspace, sspace):
+        #     print("\n", dec, bin)
 
         # Assign attributes to instance
         self._nequation = nequation
@@ -271,6 +290,10 @@ def optimize(
         elif mode == "root":
             opt_args = f, x0
             optimizer = root
+        # FIXME: For BFGS to work, objective function must return a scalar value
+        elif mode == "bfgs":
+            opt_args = f, x0
+            optimizer = minimize
         else:
             raise ValueError("invalid mode parameter")
 
diff --git a/pyci/include/pyci.h b/pyci/include/pyci.h
index 826195c..982482b 100644
--- a/pyci/include/pyci.h
+++ b/pyci/include/pyci.h
@@ -28,6 +28,7 @@
 #include <thread>
 #include <utility>
 #include <vector>
+#include <variant>
 
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE long
 #include <Eigen/Core>
@@ -69,6 +70,30 @@
 #define PYCI_CHUNKSIZE_MIN 1024
 #endif
 
+// Specialize std::hash for std::vector<int>
+namespace std {
+    template <>
+    struct hash<std::vector<int>> {
+        int operator()(const std::vector<int>& vec) const {
+            int seed = vec.size();
+            for (auto& i : vec) {
+                seed ^= i + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+            }
+            return seed;
+        }
+    };
+
+    template <>
+    struct hash<std::pair<std::vector<int>, std::vector<int>>> {
+        int operator()(const std::pair<std::vector<int>, std::vector<int>>& p) const {
+            int seed = hash<std::vector<int>>()(p.first);
+            seed ^= hash<std::vector<int>>()(p.second) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+            return seed;
+        }
+    };
+}
+
+
 namespace pyci {
 
 /* Integer types, popcnt and ctz functions. */
@@ -173,6 +198,7 @@ struct TwoSpinWfn;
 struct DOCIWfn;
 struct FullCIWfn;
 struct GenCIWfn;
+struct NonSingletCI;
 struct SparseOp;
 
 /* Number of threads global variable. */
@@ -281,8 +307,8 @@ double py_compute_enpt2(const SQuantOp &, const WfnType &, const Array<double>,
 struct SQuantOp final {
 public:
     long nbasis;
-    double ecore, *one_mo, *two_mo, *h, *v, *w;
-    Array<double> one_mo_array, two_mo_array, h_array, v_array, w_array;
+    double ecore, *one_mo, *two_mo,  *one_ao, *two_ao, *h, *v, *w;
+    Array<double> one_mo_array, two_mo_array, one_ao_array, two_ao_array, h_array, v_array, w_array;
 
     SQuantOp(void);
 
@@ -571,7 +597,7 @@ struct FullCIWfn final : public TwoSpinWfn {
     FullCIWfn(const long, const long, const long, const Array<long>);
 };
 
-struct GenCIWfn final : public OneSpinWfn {
+struct GenCIWfn : public OneSpinWfn {
 public:
     using Wfn::maxrank_dn;
     using Wfn::maxrank_up;
@@ -612,6 +638,82 @@ struct GenCIWfn final : public OneSpinWfn {
     GenCIWfn(const long, const long, const long, const Array<long>);
 };
 
+// Define structure to store det and excitation details
+struct DetExcParamIndx {
+    AlignedVector<ulong> det;
+    std::vector<long> pair_inds;
+    std::vector<long> single_inds;
+    int sign;
+};
+
+// Inline utility function for resizing DetExcParamIndx Object
+inline void ensure_struct_size(std::vector<DetExcParamIndx>& vec, const long size) {
+    if (vec.size() < static_cast<std::vector<DetExcParamIndx>::size_type>(size)) {
+        vec.resize(size);
+    }
+}
+
+struct NonSingletCI final : public GenCIWfn {
+public:
+    using Wfn::maxrank_dn;
+    using Wfn::maxrank_up;
+    using Wfn::nbasis;
+    using Wfn::ndet;
+    using Wfn::nocc;
+    using Wfn::nocc_dn;
+    using Wfn::nocc_up;
+    using Wfn::nvir;
+    using Wfn::nvir_dn;
+    using Wfn::nvir_up;
+    using Wfn::nword;
+    using Wfn::nword2;
+
+protected:
+    using Wfn::dets;
+    using Wfn::dict;
+
+public:
+    NonSingletCI(const NonSingletCI &);
+
+    NonSingletCI(NonSingletCI &&) noexcept;
+
+    NonSingletCI(const DOCIWfn &);
+
+    NonSingletCI(const FullCIWfn &);
+
+    NonSingletCI(const std::string &);
+
+    NonSingletCI(const long, const long, const long);
+
+    NonSingletCI(const long, const long, const long, const long, const ulong *);
+
+    NonSingletCI(const long, const long, const long, const long, const long *);
+
+    NonSingletCI(const long, const long, const long, const Array<ulong>);
+
+    NonSingletCI(const long, const long, const long, const Array<long>);
+
+    std::vector<std::vector<long>> generate_combinations(std::size_t, std::size_t);
+
+    std::vector<std::vector<long>> generate_cartesian_product(const AlignedVector<std::pair<int, int>>&, std::size_t);
+
+    void fill_hartreefock_det(long, long, ulong *) const;
+
+    void add_excited_dets(const ulong *, const long);
+
+    long py_add_excited_dets(const long, const pybind11::object);
+
+    long calc_sindex(const long occ, const long vir) const;
+
+    long calc_pindex(const long occ, const long vir) const;
+    
+    template <typename T>
+    void print_vector(const std::string&, const AlignedVector<T>& );
+
+    void print_pairs(const std::string&, const AlignedVector<std::pair<int, int>>&);
+
+};
+
 /* Sparse matrix operator class. */
 
 struct SparseOp final {
@@ -638,6 +740,8 @@ struct SparseOp final {
 
     SparseOp(const SQuantOp &, const GenCIWfn &, const long, const long, const bool);
 
+    SparseOp(const SQuantOp &, const NonSingletCI &, const long, const long, const bool, const std::string);
+
     pybind11::object dtype(void) const;
 
     const double *data_ptr(const long) const;
@@ -655,6 +759,8 @@ struct SparseOp final {
     void solve_ci(const long, const double *, const long, const long, const double, double *,
                   double *) const;
 
+    void update(const SQuantOp &, const NonSingletCI &, const long , const long, const long);
+    
     template<class WfnType>
     void update(const SQuantOp &, const WfnType &, const long, const long, const long);
 
@@ -672,6 +778,9 @@ struct SparseOp final {
     template<class WfnType>
     void py_update(const SQuantOp &, const WfnType &);
 
+    void py_update(const SQuantOp &, const NonSingletCI &); //, const long, const long, const long);
+
+
 private:
     void sort_row(const long);
 
@@ -680,6 +789,8 @@ struct SparseOp final {
     void add_row(const SQuantOp &, const FullCIWfn &, const long, ulong *, long *, long *);
 
     void add_row(const SQuantOp &, const GenCIWfn &, const long, ulong *, long *, long *);
+
+    void add_row(const SQuantOp &, const NonSingletCI &, const long, ulong *, long *, long *);
 };
 
 /* FanCI objective classes. */
@@ -799,4 +910,207 @@ class APIGObjective : public Objective<DOCIWfn> {
     virtual void d_overlap(const size_t, const double *x, double *y);
 };
 
+
+// Specialize base template class for AP1roGSDGeneralized_sen-o against GenCI Wfn
+class AP1roGeneralizedSenoObjective : public Objective<NonSingletCI> {
+public:
+    using Objective<NonSingletCI>::nproj;  // # of determinants in P space
+    using Objective<NonSingletCI>::nconn;  // # of determinants in S space
+    using Objective<NonSingletCI>::nparam; // # of FanCI parameters
+    using Objective<NonSingletCI>::ovlp;   // Overlap vector
+    using Objective<NonSingletCI>::d_ovlp; // Overlap gradient matrix
+    
+    using Partition = std::vector<int>; // Type alias for a partition
+    using CacheKey = std::pair<int, int>; // Key type for cache
+    // Custom hash function for CacheKey
+    struct CacheKeyHash {
+        std::size_t operator()(const CacheKey& key) const {
+            return std::hash<int>()(key.first) ^ (std::hash<int>()(key.second) << 1);
+        }
+    };
+    // Define the cache type
+    using Cache = std::unordered_map<CacheKey, std::vector<Partition>, CacheKeyHash>;
+
+
+    std::size_t nbasis;
+    std::size_t nocc;
+    std::size_t ndet;
+    std::size_t nword;
+
+
+    struct HashVector {
+        long operator()(const std::vector<long>& v) const {
+            long hash = 0;
+            for (long i : v) {
+                hash ^= std::hash<long>()(i) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
+            }
+            return hash;
+        }
+    };
+
+    // Custom equality function for std::vector<int>
+    struct VectorEqual {
+        bool operator()(const std::vector<long>& v1, const std::vector<long>& v2) const {
+            return v1 == v2;
+        }
+    };
+    
+
+
+    // std::unordered_map<std::vector<int>, int> exops;
+    std::unordered_map<std::vector<long>, int, HashVector, VectorEqual> exops;
+    std::vector<std::vector<long>> ind_exops;
+    std::vector<int> ranks;
+    std::vector<DetExcParamIndx> det_exc_param_indx; // Det and excitation details
+    std::vector<DetExcParamIndx> det_ac_inds; // Det, annhilation and creation indices, sign
+    std::vector<std::size_t> nexc_list;
+    const NonSingletCI& wfn_; // Member variable to store the wavefunction
+    
+    
+    
+    // struct pair_hash {
+    //     template <class T1, class T2>
+    //     std::size_t operator()(const std::pair<T1, T2>& p) const {
+    //         auto hash1 = std::hash<T1>{}(p.first);
+    //         auto hash2 = std::hash<T2>{}(p.second);
+    //         return hash1 ^ (hash2 << 1); // Combine the two hash values
+    //     }
+    // };
+    // Custom hash function for std::vector<long unsigned int>
+    struct vector_hash {
+        std::size_t operator()(const std::vector<std::size_t>& v) const {
+            std::size_t hash = 0;
+            for (auto& i : v) {
+                hash ^= std::hash<std::size_t>{}(i) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
+            }
+            return hash;
+        }
+    };
+    // Custom hash function for std::pair<std::vector<long unsigned int>, std::vector<long unsigned int>>
+    struct pair_hash {
+        std::size_t operator()(const std::pair<std::vector<std::size_t>, std::vector<std::size_t>>& p) const {
+            auto hash1 = vector_hash{}(p.first);
+            auto hash2 = vector_hash{}(p.second);
+            return hash1 ^ (hash2 << 1); // Combine the two hash values
+        }
+    };
+    
+
+
+    std::unordered_map<std::pair<std::vector<std::size_t>, std::vector<std::size_t>>, std::unordered_map<int, std::vector<std::vector<int>>>, pair_hash> exop_combs;
+
+    std::vector<double> s_permanent;
+    std::vector<double> p_permanent;
+    
+    // C++ constructor
+    AP1roGeneralizedSenoObjective(const SparseOp &, const NonSingletCI &,
+                         const std::size_t = 0UL, const long * = nullptr, const double * = nullptr,
+                         const std::size_t = 0UL, const long * = nullptr, const double * = nullptr);
+
+    // Python constructor
+    AP1roGeneralizedSenoObjective(const SparseOp &, const NonSingletCI &,
+                         const pybind11::object, const pybind11::object,
+                         const pybind11::object, const pybind11::object);
+
+    // C++ copy constructor
+    AP1roGeneralizedSenoObjective(const AP1roGeneralizedSenoObjective &);
+
+    // C++ move constructor
+    AP1roGeneralizedSenoObjective(AP1roGeneralizedSenoObjective &&) noexcept;
+
+    // Function to set attributes from wfn
+    void set_attributes_from_wfn(const NonSingletCI & );
+
+    // Generate combinations of pairs and singles
+    template <typename T>
+    void generate_combinations(const std::vector<T>&, int , std::vector<std::vector<T>>&, long );
+
+    // Generate partitions
+    std::vector<std::pair<int, int>> generate_partitions(int , int, bool);
+
+    // Generate excitations
+    void generate_excitations(const std::vector<std::size_t>& ,
+                            const std::vector<std::size_t>& , int , std::vector<long>& ,
+                            std::vector<long>&, long, const NonSingletCI &);
+
+    
+    // std::variant<double, std::vector<double>>  
+    double product_amplitudes_multi(
+            const std::unordered_map<int, std::vector<std::vector<int>>>&,
+            bool, const double*);
+
+        
+    std::vector<double> product_ampli_multi_deriv(
+        const std::unordered_map<int, std::vector<std::vector<int>>>& ,
+        bool , const double* );
+    
+    // Return the signature of applying annihilators then creators to the Slater determinant.
+    int sign_swap(AlignedVector<ulong> , long, long);
+
+    // Return the sign of the Slater determinant after applying excitation operators.
+    int sign_excite(AlignedVector<ulong> , const std::vector<std::size_t>& , const std::vector<std::size_t>& );
+
+
+    // Initializer for {d_,}overlap variables
+    void init_overlap(const NonSingletCI &);
+
+    // Permanent calculation: Ryser's Algorithm
+    bool permanent_calculation(const std::vector<long>&, const double*, double&);
+
+    // Helper function for d_overlap
+    double compute_derivative(const std::vector<long> , const double*, std::size_t);
+
+    // Overlap function
+    virtual void overlap(const std::size_t, const double*, double*);
+
+    // Overlap gradient function
+    virtual void d_overlap(const size_t, const double*, double*);
+
+    // New methods
+    void printPartitions(const std::vector<Partition>&);
+    
+    std::vector<Partition> findPartitions(const std::vector<int>&, int, int, Cache&);
+    
+    std::vector<Partition> intPartitionRecursive(const std::vector<int>&, int, int);
+    
+    // template <typename T>
+    void generateCombinations(const std::vector<std::size_t>, std::vector<int>::size_type, int, std::vector<int>&, std::vector<std::vector<int>>&);
+    
+    // std::unordered_map<std::vector<int>, int> assign_exops(const std::vector<int>&, int); //, const std::vector<std::vector<std::size_t>>*);
+    std::unordered_map<std::vector<long>, int, HashVector, VectorEqual> assign_exops(); //const long, const long, const long);
+
+    void assign_and_sort_exops(const std::vector<int>& , int); //, const std::vector<std::vector<std::size_t>>* );
+    
+    // void helper_generate_partitions(const std::vector<std::size_t>, std::vector<std::pair<int, int>>&, std::vector<std::pair<int, int>>::size_type, std::vector<std::vector<int>>&, std::vector<std::vector<std::vector<int>>>&);
+    void get_unordered_partition(std::vector<int>&, std::vector<std::pair<int, int>>& , 
+                                  std::vector<std::vector<int>>& , int , 
+                                  std::vector<std::vector<std::vector<int>>>& );     
+
+    std::vector<std::vector<std::vector<int>>> generate_unordered_partitions(std::vector<int>, std::vector<std::pair<int, int>>);
+    
+    std::unordered_map<int, std::vector<std::vector<int>>> group_by_size(const std::vector<std::vector<int>>&);
+    
+    template <typename T>
+    std::vector<std::vector<T>>  generate_perms(const std::vector<T> & vec) ;
+    // std::vector<std::vector<T>> generate_perms(const std::vector<T>&, std::vector<T>&, std::vector<bool>&);
+    
+    std::vector<std::pair<std::vector<int>, std::vector<int>>> combine_pairs(const std::vector<std::vector<int>>&, const std::vector<std::vector<int>>&);    
+    
+    int sign_perm(std::vector<std::size_t>, const std::vector<std::size_t>, bool);
+    
+    template <typename T>
+    T choose_dtype(size_t);
+
+    template <typename T>
+    std::vector<std::vector<std::pair<std::vector<T>, std::vector<T>>>> cartesian_product(const std::vector<std::pair<std::vector<std::vector<T>>, std::vector<std::vector<T>>>>& );
+    // std::vector<std::vector<T>> cartesian_product(const std::vector<std::vector<T>>& );
+    
+    void generate_possible_exops(const std::vector<std::size_t>&, const std::vector<std::size_t>&); 
+    //, std::vector<int>&, std::unordered_map<std::pair<std::vector<int>, std::vector<int>>, std::vector<uint64_t>>&);//std::vector<std::vector<uint64_t>>&);
+    
+    
+};
+
 } // namespace pyci
+
+
diff --git a/pyci/src/ap1rog.cpp b/pyci/src/ap1rog.cpp
index 93f1138..f770811 100644
--- a/pyci/src/ap1rog.cpp
+++ b/pyci/src/ap1rog.cpp
@@ -74,18 +74,21 @@ void AP1roGObjective::init_overlap(const DOCIWfn &wfn_)
         const ulong *det = wfn_.det_ptr(idet);
         ulong word, hword, pword;
         std::size_t h, p, nexc = 0;
+        
         for (std::size_t iword = 0; iword != nword; ++iword) {
             word = rdet[iword] ^ det[iword];
             hword = word & rdet[iword];
             pword = word & det[iword];
+
             while (hword) {
                 h = Ctz(hword);
                 p = Ctz(pword);
                 hole_list[idet * wfn_.nocc_up + nexc] = h + iword * Size<ulong>();
                 part_list[idet * wfn_.nocc_up + nexc] = p + iword * Size<ulong>() - wfn_.nocc_up;
+                
                 hword &= ~(1UL << h);
                 pword &= ~(1UL << p);
-                ++nexc;
+                ++nexc;         
             }
         }
         nexc_list[idet] = nexc;
diff --git a/pyci/src/ap1rogen.cpp b/pyci/src/ap1rogen.cpp
new file mode 100644
index 0000000..45037f2
--- /dev/null
+++ b/pyci/src/ap1rogen.cpp
@@ -0,0 +1,2239 @@
+/* This file is part of PyCI.
+ *
+ * PyCI is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your
+ * option) any later version.
+ *
+ * PyCI is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <pyci.h>
+#include <set>
+#include <iostream>
+#include <algorithm>
+#include <unordered_map>
+#include <cmath>
+#include <cstdint>
+#include <map>
+#include <tuple>
+
+namespace pyci {
+
+// Constructor with Raw Pointers
+
+// call to the base class constructor fullciwfn with the provided parameters
+AP1roGeneralizedSenoObjective::AP1roGeneralizedSenoObjective(const SparseOp &op_, const NonSingletCI &wfn_,
+                                 const std::size_t n_detcons_,
+                                 const long *idx_detcons_,
+                                 const double *val_detcons_,
+                                 const std::size_t n_paramcons_,
+                                 const long *idx_paramcons_,
+                                 const double *val_paramcons_)
+: Objective<NonSingletCI>::Objective(op_, wfn_, n_detcons_, idx_detcons_, val_detcons_, n_paramcons_, idx_paramcons_, val_paramcons_), wfn_(wfn_)
+{
+    // init_overlap(wfn_); 
+    ranks = {1, 2};
+    set_attributes_from_wfn(wfn_);
+    exops = assign_exops(); //wfn_.nbasis, wfn_.nocc, wfn_.nword);
+    // assign_and_sort_exops(ranks, wfn_.nbasis);
+    nparam = nocc / 2 * (nbasis/2 - nocc / 2); //paired-doubles
+    nparam += nocc * (nbasis - nocc); // alpha, beta singles
+
+    ovlp.resize(nconn);
+    d_ovlp.resize(nconn * nparam);
+}
+// call to initizlize the overlap related data
+
+// Constructor with pybind11 objects
+AP1roGeneralizedSenoObjective::AP1roGeneralizedSenoObjective(const SparseOp &op_, const NonSingletCI &wfn_,
+                                 const pybind11::object idx_detcons_,
+                                 const pybind11::object val_detcons_,
+                                 const pybind11::object idx_paramcons_,
+                                 const pybind11::object val_paramcons_)
+: Objective<NonSingletCI>::Objective(op_, wfn_, idx_detcons_, val_detcons_, idx_paramcons_, val_paramcons_), wfn_(wfn_)
+{
+    // init_overlap(wfn_);
+    ranks = {1, 2};
+    set_attributes_from_wfn(wfn_);
+    exops = assign_exops(); //wfn_.nbasis, wfn_.nocc, wfn_.nword);
+    // assign_and_sort_exops(ranks, wfn_.nbasis);
+    nparam = nocc / 2 * (nbasis/2 - nocc / 2); //paired-doubles
+    nparam += nocc * (nbasis - nocc); // alpha, beta singles
+
+    ovlp.resize(nconn);
+    d_ovlp.resize(nconn * nparam);
+}
+// Copy Constructor
+// obj is the constant reference to another object to be copied
+AP1roGeneralizedSenoObjective::AP1roGeneralizedSenoObjective(const AP1roGeneralizedSenoObjective &obj)
+: Objective<NonSingletCI>::Objective(obj), exops(obj.exops), ind_exops(obj.ind_exops), ranks(obj.ranks), det_exc_param_indx(obj.det_exc_param_indx), nexc_list(obj.nexc_list), wfn_(obj.wfn_)
+{
+    return;
+}
+
+// Move constructor
+// obj is the rvalue reference to another object to be moved
+AP1roGeneralizedSenoObjective::AP1roGeneralizedSenoObjective(AP1roGeneralizedSenoObjective &&obj) noexcept
+: Objective<NonSingletCI>::Objective(obj), exops(std::move(obj.exops)), ind_exops(std::move(obj.ind_exops)), ranks(std::move(obj.ranks)), det_exc_param_indx(obj.det_exc_param_indx), nexc_list(std::move(obj.nexc_list)), wfn_(obj.wfn_)
+{
+    return;
+}
+
+// Function to set attributes from wfn
+void AP1roGeneralizedSenoObjective::set_attributes_from_wfn(const NonSingletCI & wfn) {
+    nbasis = wfn.nbasis;
+    nocc = wfn.nocc;
+    ndet = wfn.ndet;
+    nword = wfn.nword;
+
+    // std::cout << "nbasis: " << nbasis << ", nocc: " << nocc << ", ndet: " << ndet << ", nword: " << nword << std::endl;
+    // // Print the types of the attributes
+    // std::cout << "Type of wfn.nbasis: " << typeid(decltype(wfn.nbasis)).name() << std::endl;
+    // std::cout << "Type of wfn.nocc: " << typeid(decltype(wfn.nocc)).name() << std::endl;
+    // std::cout << "Type of wfn.ndet: " << typeid(decltype(wfn.ndet)).name() << std::endl;
+    // std::cout << "Type of wfn.nword: " << typeid(decltype(wfn.nword)).name() << std::endl;
+}
+
+std::unordered_map<std::vector<long>, int, AP1roGeneralizedSenoObjective::HashVector, 
+        AP1roGeneralizedSenoObjective::VectorEqual> AP1roGeneralizedSenoObjective::assign_exops(){
+    //const long nbasis, const long nocc, const long nword){
+    
+    int counter = 0;
+    // std::cout << "nocc: " << nocc << ", nbasis: " << nbasis << std::endl;
+    //  Occupied indices of HF reference det
+    AlignedVector<ulong> rdet(nword);
+    wfn_.fill_hartreefock_det(nbasis, nocc, &rdet[0]);
+    AlignedVector<long> ex_from(nocc);
+    fill_occs(nword, rdet.data(), &ex_from[0]);
+
+    AlignedVector<long> ex_to;
+    for (std::size_t i = 0; i < nbasis; ++i) {
+        if (std::find(ex_from.begin(), ex_from.end(), i) == ex_from.end()) {
+            ex_to.push_back(i);
+        }
+    }
+
+    // std::cout << "****Ex_from: ";
+    // for (long i : ex_from) {
+    //     std::cout << i << " ";
+    // }
+    // std::cout << std::endl;
+    // std::cout << "****Ex_to: ";
+    // for (long i : ex_to) {
+    //     std::cout << i << " ";
+    // }
+    // std::cout << std::endl;
+
+    // Generate Single excitations: non-spin-preserving
+    for (size_t i = 0; i < ex_from.size(); ++i) {
+        long occ_alpha = ex_from[i];
+        for (long vir : ex_to) {
+            std::vector<long> exop = {occ_alpha, vir};
+            exops[exop] = counter++;
+        }
+    }
+
+    long nspatial = nbasis / 2;
+
+    // Generate paired double excitations
+    for (size_t i = 0; i < ex_from.size() / 2; ++i) {
+        long occ_alpha = ex_from[i];
+        for (size_t a = 0; a < ex_to.size() / 2; ++a) {
+            long vir_alpha = ex_to[a];
+            std::vector<long> exop = {occ_alpha, occ_alpha + nspatial, vir_alpha, vir_alpha + nspatial};
+            exops[exop] = counter++;            
+        }
+    }
+
+    // std::cout << "****Exops: ";
+    // for (const auto& exop : exops) {
+    //     std::cout << "{ ";
+    //     for (long i : exop.first) {
+    //         std::cout << i << " ";
+    //     }
+    //     std::cout << "} : " << exop.second << std::endl;
+    // }
+
+
+    // Create a vector of keys from the exops map
+    for (const auto& pair : exops) {
+        ind_exops.push_back(pair.first);
+    }
+
+    // Sort the vector of keys based on the values in the exops map
+    std::sort(ind_exops.begin(), ind_exops.end(), [this](const std::vector<long>& a, const std::vector<long>& b) {
+        return exops[a] < exops[b];
+    });
+
+    return exops;
+}
+
+
+
+
+
+// Helper function to print the result
+void AP1roGeneralizedSenoObjective::printPartitions(const std::vector<Partition>& partitions) {
+    for (const auto& partition : partitions) {
+        std::cout << "{ ";
+        for (int coin : partition) {
+            std::cout << coin << " ";
+        }
+        std::cout << "}\n";
+    }   
+}
+
+// Recursive function to find all partitions
+std::vector<AP1roGeneralizedSenoObjective::Partition> AP1roGeneralizedSenoObjective::findPartitions(const std::vector<int>& coins, int numCoinTypes, int total, Cache& cache) {
+    // std::cout << "\nCoins.size(): " << coins.size() << std::endl;
+    // std::cout << "numCoinTypes: " << numCoinTypes << std::endl;
+    // std::cout << "total: " << total << std::endl;
+    
+    if (total == 0) {
+        return {{}};
+    }   
+
+    if (total < 0 || numCoinTypes <= 0) {
+        return {}; 
+    }   
+
+    CacheKey key = {numCoinTypes, total};
+    if (cache.find(key) != cache.end()) {
+        return cache[key];
+    }   
+
+    std::vector<Partition> result;
+
+    // Include the last coin
+    for (auto& partition : findPartitions(coins, numCoinTypes, total - coins[numCoinTypes - 1], cache)) {
+        partition.push_back(coins[numCoinTypes - 1]);
+        result.push_back(partition);
+    }   
+
+    // Exclude the last coin
+    auto excludeLast = findPartitions(coins, numCoinTypes - 1, total, cache);
+    result.insert(result.end(), excludeLast.begin(), excludeLast.end());
+
+    cache[key] = result;
+    // std::cout << "Cache size: " << cache.size() << std::endl;
+    // std::cout << "Key: " << key.first << " " << key.second << std::endl;
+    // std::cout << "Result size: " << result.size() << std::endl;
+    return result;
+}
+
+// Main function to handle the partitions
+std::vector<AP1roGeneralizedSenoObjective::Partition> AP1roGeneralizedSenoObjective::intPartitionRecursive(const std::vector<int>& coins, int numCoinTypes, int total) {
+    // std::cout << "IntPartitionRecur: \nCoins: ";
+    // for (int coin : coins) {
+    //     std::cout << coin << " ";
+    // }
+    // std::cout << std::endl;
+
+    Cache cache;
+    auto result = findPartitions(coins, numCoinTypes, total, cache);
+
+    // Sort each partition to ensure consistency
+    for (auto& partition : result) {
+        std::sort(partition.begin(), partition.end());
+    }
+
+    // Remove duplicates (optional if input guarantees no duplicates)
+    std::sort(result.begin(), result.end());
+    result.erase(std::unique(result.begin(), result.end()), result.end());
+
+    // std::cout << "Number of partitions: " << result.size() << std::endl;
+    // std::cout << "Partitions: ";
+    // printPartitions(result);
+    
+    return result;
+}
+
+
+
+void AP1roGeneralizedSenoObjective::generateCombinations(const std::vector<std::size_t> collection, std::vector<int>::size_type bin_size, int start, 
+    std::vector<int>& current_comb, std::vector<std::vector<int>>& results) {
+    if (current_comb.size() == bin_size) {
+        results.push_back(current_comb);
+        return;
+    }
+
+    for (std::vector<int>::size_type i = start; i < collection.size(); ++i) {
+        current_comb.push_back(collection[i]);
+        generateCombinations(collection, bin_size, i + 1, current_comb, results);
+        current_comb.pop_back();
+    }
+}
+
+
+// Function to generate unordered partitions
+void AP1roGeneralizedSenoObjective::get_unordered_partition(std::vector<int>& collection, std::vector<std::pair<int, int>>& bin_size_num, 
+                                  std::vector<std::vector<int>>& current_partition, int index, 
+                                  std::vector<std::vector<std::vector<int>>>& results) {
+   
+    if (index == 0) {
+        results.push_back(current_partition);
+        return;
+    }
+
+    int last = collection[index-1];
+    // std::cout << "Last element from collection: " << last << std::endl;
+
+    std::vector<std::vector<std::vector<int>>> prev_partitions;
+    get_unordered_partition(collection, bin_size_num, current_partition, index - 1, prev_partitions);
+
+    for (const auto& prev_partition : prev_partitions) {
+        // std::cout << "Previous partition: ";
+        // print_current_partition(prev_partition);
+
+        int ind_bin = -1;
+        int ind_size = 0;
+        int ind_bin_size = -1;
+
+        while (ind_size < static_cast<int>(bin_size_num.size())) {
+            ind_bin += 1;
+            ind_bin_size += 1;
+
+            if (ind_bin_size == bin_size_num[ind_size].second) {
+                ind_size += 1;
+                ind_bin_size = 0;
+                if (ind_size == static_cast<int>(bin_size_num.size())) {
+                    break;
+                }
+            }
+
+            std::vector<int> subset = prev_partition[ind_bin];
+            // std::cout << "Selected subset: ";
+            // for (int num : subset) std::cout << num << " ";
+            // std::cout << std::endl;
+
+            if (subset.empty()) {
+                std::vector<std::vector<int>> new_partition = prev_partition;
+                new_partition[ind_bin].push_back(last);
+                results.push_back(new_partition);
+
+                if (bin_size_num[ind_size].second > 1) {
+                    ind_bin += bin_size_num[ind_size].second - ind_bin_size - 1;
+                    ind_size += 1;
+                    ind_bin_size = -1;
+                }
+                continue;
+            }
+
+            if (!(subset.back() < last)) {
+                // std::cout << "Skipping because " << subset.back() << " is not less than " << last << std::endl;
+                continue;
+            }
+
+            if (!(static_cast<int>(subset.size()) < bin_size_num[ind_size].first)) {
+                // std::cout << "Skipping because subset length " << subset.size() << " exceeds limit " << bin_size_num[ind_size].first << std::endl;
+                continue;
+            }
+
+            std::vector<std::vector<int>> new_partition = prev_partition;
+            new_partition[ind_bin].push_back(last);
+            results.push_back(new_partition);
+        }
+    }
+}
+
+
+std::vector<std::vector<std::vector<int>>> AP1roGeneralizedSenoObjective::generate_unordered_partitions(std::vector<int> collection, 
+    std::vector<std::pair<int, int>> bin_size_num) {
+    // std::cout << "Entering in generate unordered partition\n" ;
+    std::vector<std::vector<std::vector<int>>> results;
+
+    // Compute total number of bins
+    int total_bins = 0;
+    for (auto& bin : bin_size_num) total_bins += bin.second;
+    // std::cout << "Total bins: " << total_bins << std::endl;
+
+    std::vector<std::vector<int>> current_partition(total_bins); // Initialize correct number of bins
+    std::sort(collection.begin(), collection.end()); // Ensure input order
+    get_unordered_partition(collection, bin_size_num, current_partition, collection.size(), results);
+
+    return results;
+}
+
+
+
+// Grouping By Size
+std::unordered_map<int, std::vector<std::vector<int>>> AP1roGeneralizedSenoObjective::group_by_size(
+    const std::vector<std::vector<int>>& partitions) {
+    // std::cout << "\nInside group_by_size\n";
+    std::unordered_map<int, std::vector<std::vector<int>>> grouped_exops;
+    for (const auto& part : partitions) {
+        // std::cout << "Part size: " << part.size() << std::endl;
+        grouped_exops[part.size()].push_back(part);
+    }
+    // print_grouped_exops(grouped_exops);
+    // std::cout << "groups by size: " ;
+    // for (const auto& group : grouped_exops) {
+    //     std::cout << "Group size: " << group.first << std::endl;
+    //     for (const auto& part : group.second) {
+    //         std::cout << "{ ";
+    //         for (int i : part) {
+    //             std::cout << i << " ";
+    //         }
+    //         std::cout << "}\n";
+    //     }
+    // }
+
+    return grouped_exops;
+}
+
+// // Generating Permutations
+// template <typename T>
+// std::vector<std::vector<T>> AP1roGeneralizedSenoObjective::generate_perms(
+//     const std::vector<T>& elements, 
+//     std::vector<T>& current, 
+//     std::vector<bool>& used) {
+//     std::vector<std::vector<T>> result;
+//     if (current.size() == elements.size()) {
+//         result.push_back(current);
+//         return result;
+//     }
+//     for (size_t i = 0; i < elements.size(); ++i) {
+//         if (used[i]) continue;
+//         used[i] = true;
+//         current.push_back(elements[i]);
+//         auto perms = generate_perms(elements, current, used);
+//         result.insert(result.end(), perms.begin(), perms.end());
+//         current.pop_back();
+//         used[i] = false;
+//     }
+
+//     return result;
+// }
+
+// Helper function to generate permutations
+template <typename T>
+std::vector<std::vector<T>> AP1roGeneralizedSenoObjective::generate_perms(const std::vector<T> & vec) {
+    std::vector<std::vector<T>> perms;
+    std::vector<T> temp = vec;
+    do {
+        perms.push_back(temp);
+    } while (std::next_permutation(temp.begin(), temp.end()));
+    
+    // std::cout << "Permutations: ";
+    // for (const auto& perm : perms) {
+    //     std::cout << "{ ";
+    //     for (T i : perm) {
+    //         std::cout << i << " ";
+    //     }
+    //     std::cout << "}\n";
+    // }
+
+    return perms;
+}
+
+// Combining Paired Permutations
+// combine annihilation and creation operators of the same size
+// std::vector<std::pair<std::pair<std::vector<int>, std::vector<int>>> 
+std::vector<std::pair<std::vector<int>, std::vector<int>>> AP1roGeneralizedSenoObjective::combine_pairs(
+                                            const std::vector<std::vector<int>>& annhs, 
+                                            const std::vector<std::vector<int>>& creas) {
+    std::vector<std::pair<std::vector<int>, std::vector<int>>> combined_perms;
+    for (const auto& annh : annhs) {
+        for (const auto& crea : creas) {
+            combined_perms.emplace_back(annh, crea);
+        }
+    }
+    return combined_perms;
+}
+
+
+// Computes the sign of a permutation
+int AP1roGeneralizedSenoObjective::sign_perm(std::vector<std::size_t> jumbled_set, const std::vector<std::size_t> ordered_set, bool is_decreasing = false) {
+    if (is_decreasing) {
+        std::reverse(jumbled_set.begin(), jumbled_set.end());
+    }
+
+    // Determine the ordered set if not provided
+    std::vector<std::size_t> sorted_set = ordered_set.empty() ? jumbled_set : ordered_set;
+
+    // Ensure the ordered set is strictly increasing
+    if (!std::is_sorted(sorted_set.begin(), sorted_set.end())) {
+        throw std::invalid_argument("ordered_set must be strictly increasing.");
+    }
+
+    int sign = 1;
+
+    // Count transpositions needed to sort
+    for (size_t i = 0; i < sorted_set.size(); ++i) {
+        for (size_t j = 0; j < jumbled_set.size(); ++j) {
+            if (jumbled_set[j] > sorted_set[i]) {
+                sign *= -1;
+            } else if (jumbled_set[j] == sorted_set[i]) {
+                break;
+            }
+        }
+    }
+
+    return sign;
+}
+
+// Helper function to determine the smallest integer type to hold indices
+template <typename T>
+T AP1roGeneralizedSenoObjective::choose_dtype(size_t nparams) {
+    size_t two_power = static_cast<size_t>(std::ceil(std::log2(nparams)));
+    if (two_power <= 8) {
+        return uint8_t();
+    } else if (two_power <= 16) {
+        return uint16_t();
+    } else if (two_power <= 32) {
+        return uint32_t();
+    } else if (two_power <= 64) {
+        return uint64_t();
+    } else {
+        throw std::runtime_error("Can only support up to 2**63 number of parameters");
+    }
+}
+
+// Helper function to generate the Cartesian product of vectors
+template <typename T>
+std::vector<std::vector<std::pair<std::vector<T>, std::vector<T>>>> AP1roGeneralizedSenoObjective::cartesian_product(const std::vector<std::pair<std::vector<std::vector<T>>, std::vector<std::vector<T>>>>& v) {
+    std::vector<std::vector<std::pair<std::vector<T>, std::vector<T>>>> s = {{}};
+    for (const auto& u : v) {
+        std::vector<std::vector<std::pair<std::vector<T>, std::vector<T>>>> r;
+        for (const auto& x : s) {
+            for (const auto& y1 : u.first) {
+                for (const auto& y2 : u.second) {
+                    r.push_back(x);
+                    r.back().emplace_back(y1, y2);
+                }
+            }
+        }
+        s = std::move(r);
+    }
+    return s;
+}
+
+// Explicit instantiation of the cartesian_product template for the required type
+// template std::vector<std::vector<std::pair<std::vector<int>, std::vector<int>>>> cartesian_product(const std::vector<std::pair<std::vector<std::vector<int>>, std::vector<std::vector<int>>>>&);
+
+// std::vector<std::vector<T>> AP1roGeneralizedSenoObjective::cartesian_product(const std::vector<std::vector<T>>& v) {
+//     std::vector<std::vector<T>> result;
+//     if (v.empty()) return result;
+
+//     // Initialize result with the first vector
+//     result.push_back({});
+//     for (const auto& vec : v) {
+//         std::vector<std::vector<T>> temp;
+//         for (const auto& res : result) {
+//             for (const auto& elem : vec) {
+//                 temp.push_back(res);
+//                 temp.back().push_back(elem);
+//             }
+//         }
+//         result = std::move(temp);
+//     }
+//     return result;
+// }
+
+// Ensure the template function is instantiated for the required types
+
+// template std::vector<std::vector<std::pair<std::vector<int>, std::vector<int>>>> AP1roGeneralizedSenoObjective::cartesian_product(const std::vector<std::vector<std::pair<std::vector<int>, std::vector<int>>>>&);
+
+// template class cartesian_product<std::pair<std::vector<int>, std::vector<int>>>;
+
+
+// Function to generate all possible excitation operators
+void AP1roGeneralizedSenoObjective::generate_possible_exops(
+    const std::vector<std::size_t>& a_inds, const std::vector<std::size_t>& c_inds) 
+{
+    // std::cout << "\nGenerating possible excitation operators...\n";
+
+    // const std::unordered_set<std::vector<int>>& valid_exops;
+    std::pair<std::vector<std::size_t>, std::vector<std::size_t>> key = std::make_pair(a_inds, c_inds);
+    // std::unordered_map<std::vector<std::vector<std::size_t>>, std::size_t> inds_multi;
+    std::unordered_map<int, std::vector<std::vector<int>>> inds_multi;
+    
+    // Step 1: Get partitoins of a_inds size
+    int exrank = a_inds.size();
+    int nranks = ranks.size();
+    auto partitions = intPartitionRecursive(ranks, nranks, exrank);
+
+    for (const auto& partition : partitions) {
+        // std::cout << "Partition: ";
+        // for (int size : partition) {
+        //     std::cout << size << " ";
+        // }
+        // std::cout << "\n";
+        // Step 2: Convert partition into bin size and count format
+        std::unordered_map<int, int> reduced_partition;
+        for (int size : partition) {
+            reduced_partition[size]++;
+        }
+
+        //bin_size_num = exc_op_rank, counter of the number of that excitation operator
+        std::vector<std::pair<int, int>> bin_size_num;
+        // for (const auto& [bin_size, count] : reduced_partition) {
+        for (const auto& bins : reduced_partition) {
+            const auto& bin_size = bins.first;
+            const auto& count = bins.second;
+            bin_size_num.emplace_back(bin_size, count);
+            // std::cout << "Bin size: " << bin_size << " Count: " << count << "\n";
+        }
+
+        // Step 3: Generate all unordered partitions of a_inds and c_inds
+        // convert a_inds from size_t to int
+        std::vector<int> a_inds_int(a_inds.begin(), a_inds.end());
+        std::vector<int> c_inds_int(c_inds.begin(), c_inds.end());
+        auto annhs_partitions = generate_unordered_partitions(a_inds_int, bin_size_num);
+        auto creas_partitions = generate_unordered_partitions(c_inds_int, bin_size_num);
+
+        // std::cout << "annhs_partitions:\n";
+        // for (const auto& annhs : annhs_partitions) {
+        //     std::cout << "{ ";
+        //     for (const auto& annh : annhs) {
+        //         std::cout << "{ ";
+        //         for (int i : annh) {
+        //             std::cout << i << " ";
+        //         }
+        //         std::cout << "} ";
+        //     }
+        //     std::cout << "}\n";
+        // }
+        // std::cout << "creas_partitions:\n";
+        // for (const auto& creas : creas_partitions) {
+        //     std::cout << "{ ";
+        //     for (const auto& crea : creas) {
+        //         std::cout << "{ ";
+        //         for (int i : crea) {
+        //             std::cout << i << " ";
+        //         }
+        //         std::cout << "} ";
+        //     }
+        //     std::cout << "}\n";
+        // }
+
+
+        for (const auto& annhs : annhs_partitions) {
+            // auto annhs_grouped = group_by_size(annhs);
+            std::unordered_map<int, std::vector<std::vector<int>>> annhs_grouped;
+            for (const auto& annh : annhs) {
+                annhs_grouped[annh.size()].push_back(annh);
+            }
+
+            // std::cout << "annh_grouped:\n";
+            // for (const auto& pair : annhs_grouped) {
+            //     std::cout << pair.first << ": ";
+            //     for (const auto& vec : pair.second) {
+            //         std::cout << "{ ";
+            //         for (int i : vec) {
+            //             std::cout << i << " ";
+            //         }
+            //         std::cout << "} ";
+            //     }
+            //     std::cout << std::endl;
+            // }
+            
+            for (const auto& creas: creas_partitions) {
+                // auto creas_grouped = group_by_size(creas);
+                std::unordered_map<int, std::vector<std::vector<int>>> creas_grouped;
+                for (const auto& crea : creas) {
+                    creas_grouped[crea.size()].push_back(crea);
+                }
+
+                // std::cout << "creas_grouped:\n";
+                // for (const auto& pair : creas_grouped) {
+                //     std::cout << pair.first << ": ";
+                //     for (const auto& vec : pair.second) {
+                //         std::cout << "{ ";
+                //         for (int i : vec) {
+                //             std::cout << i << " ";
+                //         }
+                //         std::cout << "} ";
+                //     }
+                //     std::cout << std::endl;
+                // }
+
+                // Step 3: Generate permutations for each group
+                std::unordered_map<int, std::vector<std::vector<std::vector<int>>>> creas_perms;
+                // // --------------------------------------------------------------------
+                // for (const auto& pair : creas_grouped) {
+                //     std::cout << "Generating perms:\n";
+                //     std::cout << pair.second.size() << std::endl;
+                //     if (pair.first == 1 && pair.second.size() > 1) {
+                //         // Permute all bins together
+                //         std::vector<int> all_bins;
+                //         for (const auto& vec : pair.second) {
+                //             all_bins.insert(all_bins.end(), vec.begin(), vec.end());
+                //         }
+                //         auto perms = generate_perms(all_bins);
+                //         for (const auto& perm : perms) {
+                //             std::vector<std::vector<int>> split_perm;
+                //             for (size_t i = 0; i < perm.size(); i ++) {
+                //                 split_perm.push_back(std::vector<int>(perm.begin() + i, perm.begin() + i + pair.first));
+                //             }
+                //             creas_perms[pair.first].insert(creas_perms[pair.first].end(), split_perm.begin(), split_perm.end());
+                //         }
+                //     } else {
+                //         const auto& group = pair.second;
+                //         std::vector<std::vector<std::vector<int>>> perms;
+                //         if (group.size() > 1) {
+                //             // Generate permutations of the vectors in the group
+                //             std::vector<std::vector<int>> group_copy = group;
+                //             do {
+                //                 perms.push_back(group_copy);
+                //             } while (std::next_permutation(group_copy.begin(), group_copy.end()));
+                //         } else {
+                //             perms.push_back(group);
+                //         }
+                //         creas_perms[pair.first] = perms;
+                //         // for (const auto& vec : pair.second){
+                //         //     std::cout << "vec: ";
+                //         //     for (int i : vec) {
+                //         //         std::cout << i << " ";
+                //         //     }
+                //         //     std::cout << std::endl;                            
+                //         //     creas_perms[pair.first] = generate_perms(vec);
+                //         // }
+                //     }
+                    
+                // }
+                // // --------------------------------------------------------------------
+                    // const auto& size = size_group.first;
+                    // const auto& group = size_group.second;
+                    // std::vector<std::vector<int>> perms;
+                    // for (const auto& crea: group) {
+                    //     std::vector<int> temp;
+                    //     std::vector<bool> used(crea.size(), false);
+                    //     std::cout << "genreate_perms\n";
+                    //     auto current_perms = generate_perms(crea, temp, used);
+                    //     perms.insert(perms.end(), current_perms.begin(), current_perms.end());
+                    // }
+                    // creas_perms[size] = perms;
+                    // std::cout << "creas_perms:\n";
+                    // for (const auto& perm : perms) {
+                    //     std::cout << "{ ";
+                    //     for (int i : perm) {
+                    //         std::cout << i << " ";
+                    //     }
+                    //     std::cout << "}\n";
+                    // }
+                // }
+
+                for (const auto& pair : creas_grouped) {
+                    int size = pair.first;
+                    const auto& group = pair.second;
+                    std::vector<std::vector<std::vector<int>>> perms;
+
+                    // Generate permutations of the vectors in group
+                    std::vector<std::vector<int>> group_copy = group;
+                    do {
+                        perms.push_back(group_copy);
+                    } while (std::next_permutation(group_copy.begin(), group_copy.end()));
+                    creas_perms[size] = perms;
+                }
+
+                // Print creas_perms for debugging
+                // std::cout << "creas_perms:\n";
+                // for (const auto& pair : creas_perms) {
+                //     std::cout << pair.first << ": ";
+                //     for (const auto& vecs : pair.second) {
+                //         std::cout << "{ ";
+                //         for (const auto& vec : vecs) {
+                //             std::cout << "{ ";
+                //             for (int i : vec) {
+                //                 std::cout << i << " ";
+                //             }
+                //             std::cout << "} ";
+                //         }
+                //         std::cout << "} ";
+                //     }
+                //     std::cout << std::endl;
+                // }
+
+                // Combine permutations of each annihilation and creation pair (of same size)
+                // std::vector<std::vector<std::pair<std::vector<int>, std::vector<int>>>> exc_perms;
+                std::vector<std::pair<std::vector<std::vector<int>>, std::vector<std::vector<int>>>> exc_perms;
+                for (const auto& size_num : bin_size_num) {
+                    int size = size_num.first;
+                    // if (annhs_grouped.find(size) != annhs_grouped.end() && creas_perms.find(size) != creas_perms.end()) {
+                    //     if (size == 1 && size_num.second > 1) {
+                    //         /// Special case: handle permutations of entire group
+                    //         std::size_t nperms = creas_perms[size].size() / creas_grouped[size].size();
+                    //         std::cout << "nperms: " << nperms << std::endl;
+                    //         for (std::size_t i = 0; i < creas_perms[size].size(); i += nperms) {
+                    //             std::vector<std::vector<int>> split_crea;
+                    //             for (std::size_t j = 0; j < nperms ; ++j) {
+                    //                 split_crea.push_back(creas_perms[size][i + j]);
+                    //             }
+                    //             exc_perms.push_back(std::make_pair(annhs_grouped[size], split_crea));
+                    //         }
+                            
+                    //     } else {
+                    //         exc_perms.push_back(std::make_pair(annhs_grouped[size], creas_perms[size]));
+                    //     }
+                    // }
+                    if (annhs_grouped.find(size) != annhs_grouped.end() && creas_perms.find(size) != creas_perms.end()) {
+                        for (const auto& crea_perm : creas_perms[size]){
+                            exc_perms.push_back(std::make_pair(annhs_grouped[size], crea_perm));
+                        }
+                    }
+                }
+                    // auto annh_group = annhs_grouped[size];
+                    // auto crea_perm = creas_perms[size];
+                    // std::cout << "Printing crea_perm\n";
+                    // for (const auto& crea : crea_perm) {
+                    //     std::cout << "{ ";
+                    //     for (int i : crea) {
+                    //         std::cout << i << " ";
+                    //     }
+                    //     std::cout << "}\n";
+                    // }
+                    // std::vector<std::pair<std::vector<int>, std::vector<int>>> combined;
+                    // for (const auto& annh : annh_group) {
+                    //     for (const auto& crea : crea_perm) {
+
+                            // std::cout << "Annihilation: ";
+                            // for (int i : annh) {
+                            //     std::cout << i << " ";
+                            // }
+                            // std::cout << std::endl;
+                            // std::cout << "Creation: ";
+                            // for (int i : crea) {
+                            //     std::cout << i << " ";
+                            // }
+                            // std::cout << std::endl;
+                            // combined.emplace_back(std::make_pair(annh, crea));
+                           
+                            // combined.push_back(std::make_pair(annh, crea));
+                            // std::cout << "Combined: {";
+                            // for (const auto& elem : combined.back().first) {
+                            //     std::cout << elem << " ";
+                            // }
+                            // std::cout << "-->";
+                            // for (const auto& elem : combined.back().second) {
+                            //     std::cout << elem << " ";
+                            // }
+                            // std::cout << "}" << std::endl;
+                        // }
+                    // }
+                    // exc_perms.push_back(combined);
+
+                // }
+
+                // Print exc_perms
+                // std::cout << "\nexc_perms: ";
+                // for (const auto& pair : exc_perms) {
+                //     for (const auto& annh : pair.first) {
+                //         std::cout << "{ ";
+                //         for (int i : annh) {
+                //             std::cout << i << " ";
+                //         }
+                //     }
+                //     std::cout << "} -> { ";
+                //     for (const auto& crea : pair.second) {
+                //         for (int i : crea) {
+                //             std::cout << i << " ";
+                //         }
+                //         std::cout << "} ";
+                //     }    
+                //     std::cout << std::endl;
+                // }
+
+                
+                // for (const auto& excs : cartesian_product(exc_perms)) {
+                for (const auto& excs : exc_perms) {
+                    // std::cout << "***Cartesian product***\n";
+                    std::vector<std::vector<long>> combs;
+                    // std::vector<std::pair<std::vector<long>, std::vector<long>>> combs;
+                    bool is_continue = false;
+                    // for (std::size_t i = 0; i < excs.first.size(); ++i) {
+                    //     // auto [annh, crea] = exc;
+                    auto annhs = excs.first;
+                    auto creas = excs.second;
+
+                    // std::cout << "Annihilation: ";
+                    // for (const auto& vec : annhs) {
+                    //     std::cout << "{ ";
+                    //     for (int elem : vec) {
+                    //         std::cout << elem << " ";
+                    //     }
+                    //     std::cout << "} ";
+                    // }
+                    // std::cout << std::endl;
+                    // std::cout << "Creation: ";
+                    // for (const auto& vec : creas) {
+                    //     std::cout << "{ ";
+                    //     for (int elem : vec) {
+                    //         std::cout << elem << " ";
+                    //     }
+                    //     std::cout << "} ";
+                    // }
+                    // std::cout << std::endl;
+                        //(annh);
+                        // op.insert(op.end(), crea.begin(), crea.end());
+                        // op.reserve(annh.size() + crea.size()); // Reserve space for efficiency
+                        
+                        // std::vector<long> op;
+                        // for (int a : annh) {
+                        //     std::cout << "a: " << a << std::endl;
+                        //     op.push_back(static_cast<long>(a)); // Convert each element to long
+                        // }
+                        // for (int c : crea) {
+                        //     std::cout << "c: " << c << std::endl;
+                        //     op.push_back(static_cast<long>(c)); // Convert each element to long
+                        // }
+                    for (std::size_t j = 0; j < annhs.size(); ++j) {
+                        std::vector<long> annh(annhs[j].begin(), annhs[j].end()); // Convert each element to long
+                        std::vector<long> crea(creas[j].begin(), creas[j].end()); // Convert each element to long
+                        std::vector<long> op(annh.begin(), annh.end());
+                        op.insert(op.end(), crea.begin(), crea.end());
+                        // std::cout << "op: ";
+                        // for (long elem : op) {
+                        //     std::cout << elem << " ";
+                        // }
+                        // std::cout << std::endl;
+
+                        // std::cout << "Size of exops: " << exops.size() << std::endl;
+            
+                        // std::csout<< "searching for op in exops\n";
+                        // if (exops.find(op) != exops.end()) {
+                        //     std::cout << "op found in exops\n";
+                        // }
+                        // else if (exops.find(op) == exops.end()) {
+                        //     std::cout << "op not found in exops\n";
+                        //     is_continue = false;
+                        //     break;
+                        // }
+                        // try {
+                        //     combs.push_back(op);
+                        //     std::cout << "Successfully pushed op to combs" << std::endl;
+                        //     } catch (const std::exception& e) {
+                        //         std::cerr << "Exception caught during push_back: " << e.what() << std::endl;
+                        //     } catch (...) {
+                        //         std::cerr << "Unknown exception caught during push_back" << std::endl;
+                        // }
+
+                        if (std::find_if(exops.begin(), exops.end(), [&op](const auto& pair) {
+                            return pair.first == op;
+                        }) == exops.end()) {
+                            // std::cout << "op not found in exops\n";
+                            is_continue = true;
+                            break;
+                        }
+                        combs.push_back(op);
+                        
+                        // }
+                    
+                    }
+                    if (is_continue) break;
+                    // if (is_continue) continue;
+
+                    int num_hops = 0, prev_hurdles = 0;
+                    //std::cout << "combs size: " << combs.size() << std::endl;
+            
+                    std::vector<std::size_t> jumbled_a_inds, jumbled_c_inds;
+                    for (const auto& exop : combs) {
+                        size_t num_inds = exop.size() / 2;
+                        num_hops += prev_hurdles * num_inds;
+                        prev_hurdles += num_inds;
+                        
+                        //std::cout << "num_hops: " << num_hops << ", prev_hurdles: " << prev_hurdles << std::endl;
+
+                        jumbled_a_inds.insert(jumbled_a_inds.end(), exop.begin(), exop.begin() + num_inds);
+                        jumbled_c_inds.insert(jumbled_c_inds.end(), exop.begin() + num_inds, exop.end());
+                    }
+
+                    int sign = (num_hops % 2 == 0) ? 1 : -1;
+                    
+                    
+
+                    sign *= sign_perm(jumbled_a_inds, a_inds, false);
+                    sign *= sign_perm(jumbled_c_inds, c_inds, false);
+
+                    std::vector<int> inds;
+                    for (const auto& exop : combs) {
+                        inds.push_back(exops[exop]);
+                    }
+                    inds.push_back(sign);
+                    //std::cout << "Sign: " << sign << std::endl;
+
+                    
+                    //std::cout << "Inds: ";
+                    // for (int i : inds) {
+                    //     std::cout << i << " ";
+                    //     if (i > 10000) {
+                    //         std::cout << "i > 10000\n";
+                    //     }
+                    // }
+                    // std::cout << std::endl;
+
+                    if (inds_multi.find(inds.size() - 1) == inds_multi.end()) {
+                        //std::cout << "Inds_multi not found\n";
+                        inds_multi[static_cast<int>(inds.size() - 1)] = {};
+                    }
+                    inds_multi[static_cast<int>(inds.size() - 1)].push_back(inds);
+                }
+            }
+        
+        } 
+    }
+    //std::cout << "addign inds_multi exop_combs: ";
+    // exop_combs[key].push_back(inds_multi);
+    try {
+        exop_combs[key] = inds_multi;
+        //std::cout << "Successfully assigned inds_multi to exop_combs" << std::endl;
+    } catch (const std::exception& e) {
+        std::cerr << "Exception caught during exop_combs assignment: " << e.what() << std::endl;
+    } catch (...) {
+        std::cerr << "Unknown exception caught during exop_combs assignment" << std::endl;
+    }
+
+    // std::cout << "exop_combs done";
+    // Print the contents of exop_combs
+    // std::cout << "Contents of exop_combs:" << std::endl;
+    // for (const auto& pair : exop_combs) {
+    //     std::cout << "\n{";
+    //     for (const auto& elem : pair.first.first) {
+    //         std::cout << elem << ", ";
+    //     }
+    //     for (const auto& elem : pair.first.second) {
+    //         std::cout << elem << ", ";
+    //     }
+    //     std::cout << "} : ";
+    //     for (const auto& vec : pair.second) {
+    //         std::cout << "{";
+    //         const auto& rank = vec.first;
+    //         const auto& inds = vec.second;
+    //         std::cout << rank << ":" ;
+    //         for (const auto& ind : inds) {
+    //             std::cout << "{";
+    //             for (int i : ind) {
+    //                 std::cout << i << " ";
+    //             }
+    //             std::cout << "}";
+    //         }
+    //         std::cout << "}, ";
+    //     }
+    //     std::cout << std::endl;
+    // }
+
+
+
+    // Determine dtype
+    // uint64_t dtype = choose_dtype<uint64_t>(nparam);
+
+//     // Prepare reshaped and finalized inds_multi
+//     for (auto& [rank, inds] : inds_multi) {
+//         // Reshape to (-1, rank + 1) and store in uint64_t format
+//         size_t row_size = rank + 1;
+//         size_t nrows = inds.size() / row_size;
+//         std::vector<uint64_t> reshaped_inds;
+//         std::pair<std::vector<std::size_t>, std::vector<std::size_t>> key = std::make_pair(a_inds, c_inds);
+
+//         for (size_t i = 0; i < nrows; ++i) {
+//             uint64_t packed_row = 0;            
+//             for (size_t j = 0; j < row_size; ++j) {
+//                 packed_row = (packed_row << 16) | static_cast<uint64_t>(inds[i * row_size + j][0]);
+//             }
+//             reshaped_inds.push_back(packed_row);   
+//         }
+//         // Save reshaped indices in the exop_combs structure
+//         exop_combs[key][rank] = reshaped_inds;
+//     }
+}
+
+
+// std::variant<double, std::vector<double>> AP1roGeneralizedSenoObjective::product_amplitudes_multi(
+double AP1roGeneralizedSenoObjective::product_amplitudes_multi(
+    const std::unordered_map<int, std::vector<std::vector<int>>>& inds_multi,
+    bool deriv, const double* x){
+    // std::cout << "deriv: " << deriv << std::endl;
+    double output = 0.0;
+    if (deriv) {
+        throw std::invalid_argument("Derivative computation is not supported.");
+    } else {
+        // for (const auto& [exc_order, inds_sign] : inds_multi) {
+        // std::cout << "In product_amplitudes_multi\n";
+        // std::cout << "inds_multi size: " << inds_multi.size() << std::endl;
+
+        for (const auto& exc_inds : inds_multi) {
+            // std::cout << "Processing exc_inds\n";
+        
+            // std::size_t exc_order = exc_inds.first;
+            const std::vector<std::vector<int>> inds_sign = exc_inds.second;
+            
+            // std::cout << "Exc_order: " << exc_order << std::endl;
+            // std::cout << "inds_sign size: " << inds_sign.size() << std::endl;
+        
+            // for (const auto& row : inds_sign) {    
+            //     std::cout << "Row: ";
+            //     for (int i : row) {
+            //         std::cout << i << " ";
+            //     }
+            //     std::cout << std::endl;
+            // }
+
+            std::vector<std::vector<int>> indices;
+            std::vector<int> signs;
+            for (const auto& row : inds_sign) {
+
+                // std::cout << "Row: ";
+                // for (int i : row) {
+                //     std::cout << i << " ";
+                // }
+                // std::cout << std::endl;
+
+                if (row.empty()){
+                    std::cerr << "Error: row in empty\n";
+                }
+
+                
+
+                std::vector<int> ind(row.begin(), row.end() - 1);
+                indices.push_back(ind);
+                
+                // std::cout << "Indices: ";
+                // for (int i : ind) {
+                //     std::cout << i << " ";
+                // }
+                // std::cout << std::endl;
+
+                int sign = row.back();
+                signs.push_back (sign > 1 ? -1 :  sign);
+            }
+
+            for (size_t i = 0; i < indices.size(); ++i) {
+                double product = 1.0;
+                for (const auto& idx : indices[i]) {
+                    // std::cout << "x[idx]: " << x[idx] << std::endl;
+                    product *= x[idx];
+                }
+                output += product * signs[i];
+            }
+        }
+        
+    }
+    return output;
+
+    // // Derivative computation
+    // std::vector<double> output(nparam, 0.0);
+    // for (const auto& [exc_order, inds_sign] : inds_multi) {
+    //     std::vector<std::vector<int>> indices;
+    //     std::vector<int> signs;
+    //     for (const auto& row : inds_sign) {
+    //         std::vector<int> ind(row.begin(), row.end() - 1);
+    //         indices.push_back(ind);
+    //         int sign = row.back();
+    //         signs.push_back(sign > 1 ? -1 : sign);
+    //     }
+
+    //     std::unordered_set<int> unique_inds;
+    //     for (const auto& row : indices) {
+    //         unique_inds.insert(row.begin(), row.end());
+    //     }
+        
+    //     for (const auto& ind : unique_inds) {
+    //         std::vector<bool> bool_inds;
+    //         for (const auto& row : indices) {
+    //             bool_inds.push_back(std::find(row.begin(), row.end(), ind) != row.end());
+    //         }
+
+    //         std::vector<bool> row_inds;
+    //         for (const auto& bi : bool_inds) {
+    //             row_inds.push_back(bi);
+    //         }
+
+    //         std::vector<std::vector<double>> selected_params;
+    //         for (const auto& row : indices) {
+    //             std::vector<double> temp;
+    //             for (const auto& idx : row) {
+    //                 temp.push_back(x[idx]);
+    //             }
+    //             selected_params.push_back(temp);
+    //         }
+
+    //         std::vector<double> old_params;
+    //         for (size_t i = 0; i < selected_params.size(); ++i) {
+    //             if (bool_inds[i]) {
+    //                 old_params.push_back(selected_params[i][ind]);
+    //                 selected_params[i][ind] = 1.0;
+    //             }
+    //         }
+
+    //         for (size_t i = 0; i < selected_params.size(); ++i) {
+    //             if (row_inds[i]) {
+    //                 double prod = 1.0;
+    //                 for (const auto& val : selected_params[i]) {
+    //                     prod *= val;
+    //                 }
+    //                 output[ind] += prod * signs[i];
+    //             }
+    //         }
+
+    //         for (size_t i = 0; i < selected_params.size(); ++i) {
+    //             if (bool_inds[i]) {
+    //                 selected_params[i][ind] = old_params[i];
+    //             }
+    //         }
+    //     }
+    // }    
+    // return output;
+}
+
+std::vector<double> AP1roGeneralizedSenoObjective::product_ampli_multi_deriv(
+    const std::unordered_map<int, std::vector<std::vector<int>>>& indices_multi,
+    bool deriv, const double* params){
+    std::vector<double> output(nparam, 0.0);
+    if (!deriv) {
+        throw std::invalid_argument("Deriv must be enabled for this function.");
+    }
+
+    for (const auto& indices_sign : indices_multi) {
+        const auto& indices = indices_sign.second;
+        std::vector<int> signs(indices.size());
+        std::transform(indices.begin(), indices.end(), signs.begin(), [](const std::vector<int>& row) {
+            return row.back() > 1 ? -1 : row.back();
+        });
+
+        std::set<int> unique_indices;
+        for (const auto& row : indices) {
+            unique_indices.insert(row.begin(), row.end() - 1);
+        }
+
+        for (int ind : unique_indices) {
+            std::vector<bool> bool_indices(indices.size());
+            std::transform(indices.begin(), indices.end(), bool_indices.begin(), [ind](const std::vector<int>& row) {
+                return std::find(row.begin(), row.end() - 1, ind) != row.end() - 1;
+            });
+
+            std::vector<bool> row_inds(indices.size());
+            std::transform(bool_indices.begin(), bool_indices.end(), row_inds.begin(), [](bool b) { return b; });
+
+            std::vector<std::vector<double>> selected_params(indices.size());
+            for (std::size_t i = 0; i < indices.size(); ++i) {
+                selected_params[i].resize(indices[i].size() - 1);
+                for (std::size_t j = 0; j < indices[i].size() - 1; ++j) {
+                    selected_params[i][j] = params[indices[i][j]];
+                }
+            }
+
+            std::vector<double> old_params(selected_params.size());
+            for (std::size_t i = 0; i < selected_params.size(); ++i) {
+                if (bool_indices[i]) {
+                    old_params[i] = selected_params[i][0];
+                    selected_params[i][0] = 1.0;
+                }
+            }
+
+            for (std::size_t i = 0; i < row_inds.size(); ++i) {
+                if (row_inds[i]) {
+                    double prod = 1.0;
+                    for (std::size_t j = 0; j < selected_params[i].size(); ++j) {
+                        prod *= selected_params[i][j];
+                    }
+                    output[ind] += prod * signs[i];
+                }
+            }
+
+            for (std::size_t i = 0; i < selected_params.size(); ++i) {
+                if (bool_indices[i]) {
+                    selected_params[i][0] = old_params[i];
+                }
+            }
+        }
+    }
+    return output;
+}
+
+int AP1roGeneralizedSenoObjective::sign_swap(AlignedVector<ulong> sd, long pos_current, long pos_future){
+    // Return the signature of applying annihilators then creators to the Slater determinant.
+    if (pos_current < 0 || pos_future < 0) {
+        throw std::invalid_argument("The current and future positions must be positive integers.");
+    }
+
+    // if (!((sd >> pos_current) & 1)) {
+    if (!((sd[pos_current / Size<ulong>()] >> (pos_current % Size<ulong>())) & 1)) {
+        throw std::invalid_argument("Given orbital is not occupied in the Slater determinant.");
+    }
+
+    int num_trans = 0;
+    if (pos_current < pos_future) {
+        // Count the number of set bits between pos_current and pos
+        for (long i = pos_current + 1; i <= pos_future; ++i) {
+            if ((sd[i / Size<ulong>()] >> (i % Size<ulong>())) & 1) {
+                ++num_trans;
+            }
+        }
+    } else {
+        for (long i = pos_future; i < pos_current; ++i) {
+            if ((sd[i / Size<ulong>()] >> (i % Size<ulong>())) & 1) {
+                ++num_trans;
+            }
+        }
+    }
+    return (num_trans % 2 == 0) ? 1 : -1;
+
+}
+
+int AP1roGeneralizedSenoObjective::sign_excite(AlignedVector<ulong> sd,
+    const std::vector<std::size_t>& annihilators, const std::vector<std::size_t>& creators) {
+    // Return the sign of the Slater determinant after applying excitation operators.
+    int sign = 1;
+    for (std::size_t i : annihilators) {
+        // if (!((sd >> i) & 1)) {
+        if (!((sd[i / Size<ulong>()] >> (i % Size<ulong>())) & 1)) {
+            //print sd
+            std::cout << "Annihilator: " << i << std::endl;
+            for (std::size_t k = 0; k < sd.size(); ++k) {
+                std::cout << sd[k] << " ";
+            }
+            std::cout << std::endl;
+            throw std::invalid_argument("Given Slater determinant cannot be excited using the given creators and annihilators.");
+        }
+        sign *= sign_swap(sd, i, 0);
+        // sd = sd & ~(1UL << i); // annihilate ith orbital
+        sd[i / Size<ulong>()] &= ~(1UL << (i % Size<ulong>())); // annihilate ith orbital
+    }
+
+    for (std::size_t a : creators) {
+        // sd = sd | (1UL << a); // create ath orbital
+        sd[a / Size<ulong>()] |= (1UL << (a % Size<ulong>())); // create ath orbital
+        // if (sd == 0) {
+        if (!((sd[a / Size<ulong>()] >> (a % Size<ulong>())) & 1)) {
+            //print sd
+            std::cout << "Creator: " << a << std::endl;
+            for (std::size_t k = 0; k < sd.size(); ++k) {
+                std::cout << sd[k] << " ";
+            }
+            std::cout << std::endl;
+            throw std::invalid_argument("Given Slater determinant cannot be excited using the given creators and annihilators.");
+        }
+        sign *= sign_swap(sd, a, 0);
+    }
+    return sign;
+}
+
+
+void AP1roGeneralizedSenoObjective::overlap(std::size_t ndet, const double* x, double* y)
+{
+    std::cout << "-------Computing Overlap\n nbasis: " << nbasis << ", ndet: " << ndet << "\n" ;
+    // long nocc_up = nocc / 2; 
+    // long nb = nbasis / 2;
+
+    // // nparam = nocc_up * (nb - nocc_up); //paired-doubles
+    // // nparam += nocc * (nbasis - nocc); // alpha, beta singles
+
+    // // ovlp.resize(nconn);
+    // // d_ovlp.resize(nconn * nparam);
+    std::cout << "nconn: " << nconn << ", nparam: " << nparam << "\n";
+
+    AlignedVector<ulong> rdet(nword);
+    wfn_.fill_hartreefock_det(nbasis, nocc, &rdet[0]);
+    std::cout << "rdet: " ;
+    for (std::size_t k = 0; k < nword; k++) {
+        std::cout << rdet[k]; }
+    std::cout << std::endl;
+
+    if (y == nullptr) {
+        std::cerr << "Error: y is a null pointer" << std::endl;
+        // return;
+    }
+
+    det_ac_inds.resize(nconn);
+
+    for (std::size_t idet = 0; idet != ndet; ++idet) {
+    //  std::cout << "\n-----Processing idet = " << idet << "\n";
+        y[idet] = 0.0;
+        // std::cout << "Accessing det_ptr" << std::endl;
+
+        if (idet >= static_cast<std::size_t>(wfn_.ndet)) {
+            std::cerr << "Error: idet (" << idet << ") is out of bounds (ndet: " << wfn_.ndet << ")" << std::endl;
+            continue;
+        }
+
+        const ulong *det = wfn_.det_ptr(idet);
+        if (det == nullptr) {
+            std::cerr << "Error: det_ptr returned nullptr for idet = " << idet << std::endl;
+            // continue;
+        }
+
+        // std::cout << "det: " ;
+        for (std::size_t k = 0; k < nword; k++) {
+            std::cout << det[k]; }
+        std::cout << ",,";
+
+        // Check if given det is the same as the reference det
+        bool are_same = true;
+        for (std::size_t k = 0; k < nword; ++k) {
+            // std::cout << det[k] << " ";
+            if (rdet[k] != det[k]) {
+                are_same = false;
+                break;
+            }
+        }
+        // std::cout << "are_same: " << are_same << std::endl;
+
+        ulong word, hword, pword;
+        std::size_t h, p, nexc = 0;
+
+        std::vector<std::size_t> holes;
+        std::vector<std::size_t> particles;
+
+        // Container to save annhiliation and creation indices, and sign
+        DetExcParamIndx ac_info;
+        ac_info.det.resize(nword);
+        ac_info.pair_inds.resize(1); // for annhilation indices
+        ac_info.single_inds.resize(1); // for creation indices
+
+        // Default values for indices
+        ac_info.pair_inds[0] = -1;
+        ac_info.single_inds[0] = -1;
+        ac_info.sign = 1;
+
+        // Storing current det
+        std::memcpy(&ac_info.det[0], &det[0], sizeof(ulong) * nword);
+
+        // Collect holes and particles
+        for (std::size_t iword = 0; iword != nword; ++iword)
+        {
+            word = rdet[iword] ^ det[iword]; //str for excitation
+            hword = word & rdet[iword]; //str for hole
+            pword = word & det[iword]; //str for particle
+
+            while(hword){
+                h = Ctz(hword);
+                p = Ctz(pword);
+                
+                std::size_t hole_idx = h + iword * Size<ulong>();
+                std::size_t part_idx = p + iword * Size<ulong>(); // - nocc_up;
+                
+                holes.push_back(hole_idx);
+                particles.push_back(part_idx);
+
+                hword &= ~(1UL << h);
+                pword &= ~(1UL << p);
+
+                ++nexc;
+            }
+        }
+
+
+        // Sen-o processing
+        // Check  if annhiliation and creation operators are in the exop_combinations
+        std::pair<std::vector<std::size_t>, std::vector<std::size_t>> key = std::make_pair(holes, particles);
+        // std::cout << "key: " << key.first.size() << " " << key.second.size() << std::endl;
+        if (!are_same){
+            // std::cout << "holes: ";
+            // for (std::size_t i : holes) {
+            //         std::cout << i << " ";}
+            // std::cout << std::endl;
+
+            if (exop_combs.find(key) == exop_combs.end()) {
+                
+                // std::cout << "particles: ";
+                // for (std::size_t i : particles) {
+                //     std::cout << i << " ";
+                // }
+                // std::cout << std::endl;
+ 
+                std::vector<std::size_t> holes_int(holes.begin(), holes.end());
+                std::vector<std::size_t> particles_int(particles.begin(), particles.end());
+                generate_possible_exops(holes_int, particles_int);
+                // std::cout << "\nGenerated possible exops\n";
+
+                std::unordered_map<int, std::vector<std::vector<int>>> inds_multi = exop_combs[key];
+                
+                // std::cout <<"inds_multi size: " << inds_multi.size() << std::endl;
+                // std::cout << "**Inds_multi: ";
+                // for (const auto& rank_inds: inds_multi) {
+                //     const auto& rank = rank_inds.first;
+                //     const std::vector<std::vector<int>> inds = rank_inds.second;
+                //     std::cout << rank << ":" << std::endl;
+                //     for (const auto& ind : inds) {
+                //         std::cout << "{";
+                //         for (int i : ind) {
+                //             std::cout << i << " ";
+                //         }
+                //         std::cout << "}\n";
+                //     }
+                // }
+                
+
+                // Occupied indices of HF reference det
+                AlignedVector<long> occs(nocc);
+                fill_occs(nword, rdet.data(), &occs[0]);
+
+                // Processing sen-o
+                for (auto& pair : inds_multi){  
+                    auto& exc_order = pair.first;
+                    auto& inds_sign_ = pair.second;
+
+                    // std::cout << "exc_order: " << exc_order << std::endl;
+                    // std::cout << "inds_sign_ size: " << inds_sign_.size() << std::endl;
+                    
+                    // last element of inds_sign is the sign of the excitation
+                    // auto& sign_ = inds_sign_.back();
+
+                    // std::cout << "Params: ";
+                    // for (int i = 0; i < nparam; i++) {
+                    //     std::cout << x[i] << " ";
+                    // }
+
+                    std::vector<std::vector<int>> selected_rows;
+                    // for (auto it = inds_sign_.begin(); it != inds_sign_.end() - 1; ++it) {
+                    //     const auto& row = *it;                        
+                    for (const auto& row : inds_sign_) {
+                        // std::cout << "Processing Row: ";
+                        // for (int i : row) {
+                        //     std::cout << i << " ";
+                        // }
+                        // std::cout << std::endl;
+
+                        std::unordered_set<int> trash;
+                        bool skip_row = false;
+                      
+                        // for (const auto& exop_indx : row) {
+                        for (auto it = row.begin(); it != row.end() - 1; ++it) {
+                            const auto& exop_indx = *it;
+
+                            // Check if exop_indx is a valid index
+                            if (exop_indx < 0 || static_cast<std::size_t>(exop_indx) >= ind_exops.size()) {
+                                std::cout << "ind_exops.size(): " << ind_exops.size() << std::endl;
+                                std::cerr << "Error: exop_indx " << exop_indx << " is out of bounds" << std::endl;
+                                continue;
+                            }
+                            const auto& exop = ind_exops[exop_indx];
+                            
+                            // std::cout << "exop_indx: " << exop_indx << std::endl;
+                            // std::cout << "exop: ";
+                            // for (long i : exop) {
+                            //     std::cout << i << " ";
+                            // }
+                            // std::cout << std::endl;
+
+                            if (exop.size() == 2) {
+                                // std::cout << "exop size: 2\n";
+                                if (trash.find(exop[0]) != trash.end()) {
+                                    skip_row = true;
+                                    break;
+                                }
+                                if (exop[0] < static_cast<int>(nbasis / 2)) {
+                                    if (std::find(occs.begin(), occs.end(), exop[0] + nbasis / 2) == occs.end()) {
+                                        skip_row = true;
+                                        break;
+                                    } else {
+                                        trash.insert(exop[0]);
+                                        trash.insert(exop[0] + nbasis / 2);
+                                    }
+                                } else {
+                                    if (std::find(occs.begin(), occs.end(), exop[0] - nbasis / 2) == occs.end()) {
+                                        skip_row = true;
+                                        break;
+                                    } else {
+                                        trash.insert(exop[0]);
+                                        trash.insert(exop[0] - nbasis / 2);
+                                    }
+                                }
+                            } else {
+                                // std::cout << "exop size: != 2\n";
+                                for (size_t j = 0; j < exop.size() / 2; ++j) {
+                                    if (trash.find(exop[j]) != trash.end()) {
+                                        skip_row = true;
+                                        break;
+                                    } else {
+                                        trash.insert(exop[j]);
+                                    }
+                                }
+                                if (skip_row) break;
+                            }
+                        }
+                        if (!skip_row) {
+                            selected_rows.push_back(row);
+                        }
+                    }
+                    inds_multi[exc_order] = selected_rows;
+                    
+                }
+
+                // std::cout << "Inds_multi: ";
+                // for (const auto& rank_inds: inds_multi) {
+                //     const auto& rank = rank_inds.first;
+                //     const std::vector<std::vector<int>> inds = rank_inds.second;
+                //     std::cout << rank << ":" << std::endl;
+                //     for (const auto& ind : inds) {
+                //         std::cout << "{";
+                //         for (int i : ind) {
+                //             std::cout << i << " ";
+                //         }
+                //         std::cout << "}\n";
+                //     }
+                // }
+                
+
+                double amplitudes = 0.0;
+                amplitudes = product_amplitudes_multi(inds_multi, false, x);
+                // double amplitudes = std::get<double>(amplitudes_variant);
+                int sign = sign_excite(rdet, holes, particles);
+                ac_info.sign = sign;
+                y[idet] = sign * amplitudes;
+
+                // std::cout << "Amplitudes: " ;
+                std::cout << amplitudes << ",," << sign << std::endl;
+                // std::cout << "Sign: " <<;
+                
+
+
+            } else{
+                std::unordered_map<int, std::vector<std::vector<int>>> inds_multi = exop_combs[key];
+                auto amplitudes = product_amplitudes_multi(inds_multi, false, x);
+                int sign = sign_excite(rdet, holes, particles);
+                ac_info.sign = sign;
+                std::cout << amplitudes << ",," << sign << std::endl;
+                y[idet] = sign * amplitudes;
+            }
+
+            // Store the annihilation and creation indices and sign
+            if (holes.size() > 0 && particles.size() > 0) {
+                // std::cout << "Storing annihilation and creation indices\n";
+                std::vector<long> holes_long(holes.begin(), holes.end());
+                std::vector<long> particles_long(particles.begin(), particles.end());
+                ac_info.pair_inds.clear();
+                ac_info.single_inds.clear();
+                ac_info.pair_inds = holes_long;
+                ac_info.single_inds = particles_long;
+                
+                // Ensure det_ac_inds has enough space
+                ensure_struct_size(det_ac_inds, idet+1);
+                det_ac_inds[idet] = ac_info;
+                // std::cout << "storing done\n";
+            }
+
+        } else if (are_same) {
+            y[idet] = 1.0;
+            det_ac_inds[idet] = ac_info;
+        } else {
+            y[idet] = 0.0;
+            det_ac_inds[idet] = ac_info;
+        }
+        
+        
+    }
+    
+}
+
+
+
+    
+
+
+//---------------------------------------------------------------------------------------------------------------------------------------------------
+template <typename T>
+void AP1roGeneralizedSenoObjective::generate_combinations(const std::vector<T>& elems, int k, std::vector<std::vector<T>>& result, long nbasis) {
+    std::vector<bool> mask(elems.size());
+    std::fill(mask.end() - k, mask.end() + k, true);
+    do {
+        std::vector<T> combination;
+        for (std::size_t i = 0; i < elems.size(); ++i) {
+            if (mask[i]) combination.push_back(elems[i]);
+        }
+        if (k == 2) {
+            if (combination.size() == 2 && (combination[1] - combination[0] == static_cast<unsigned long> (nbasis))) {
+                result.push_back(combination);
+            }
+        } else {
+            result.push_back(combination);
+        }
+    } while (std::next_permutation(mask.begin(), mask.end()));
+}
+
+std::vector<std::pair<int, int>> AP1roGeneralizedSenoObjective::generate_partitions(int e, int max_opairs, bool nvir_pairs) {
+    std::vector<std::pair<int, int>> partitions;
+    for (int p = 0; p <= std::min(e / 2 , max_opairs); ++p) {
+        int s = e - 2 * p;
+        if (max_opairs > 0 && nvir_pairs && p) {
+            if (e % 2 !=0) { // if e is odd
+                partitions.emplace_back(p, s); 
+            }
+            else if (e %2 == 0 && s <= (max_opairs - p)) {
+                partitions.emplace_back(p, s);
+            }
+        } else if (max_opairs == 0) {
+            // if (e % 2 ==0 && s <= max_opairs) {
+            partitions.emplace_back(0, s);
+            // } else if 
+        
+        }
+    }
+    return partitions;
+}
+
+void AP1roGeneralizedSenoObjective::generate_excitations(const std::vector<std::size_t>& holes,
+    const std::vector<std::size_t>& particles, int excitation_order, std::vector<long>& pair_inds,
+    std::vector<long>& single_inds, long nbasis, const NonSingletCI &wfn_) {
+
+    AlignedVector<std::pair<int,int>> occ_pairs, vir_pairs;
+    AlignedVector<long> occs_up, occs_dn, virs_up, virs_dn; // , temp_occs;
+    for (int i : holes) {
+        (i < nbasis ? occs_up : occs_dn).push_back(i);
+    }
+    for (int a : particles) {
+        (a < nbasis ? virs_up : virs_dn).push_back(a);
+    }
+
+    // Create an unordered set for fast lookup of occupied down-orbitals
+    std::unordered_set<int> occ_dn_set(occs_dn.begin(), occs_dn.end()); 
+    std::unordered_set<int> virs_set(particles.begin(), particles.end());    
+
+    // Generate occ_pairs
+    for (long i : occs_up) {
+        if (occ_dn_set.find(i + nbasis) != occ_dn_set.end()) {
+            occ_pairs.push_back({i, i + nbasis});   
+            // temp_occs.push_back(i);
+            // temp_occs.push_back(i + nbasis);
+        }
+    }
+    // Generate vir_pairs
+    for (long a : particles) {
+        if (virs_set.find(a + nbasis) != virs_set.end()) {
+            vir_pairs.push_back({a, a + nbasis});   
+        }
+    }
+
+    std::vector<std::pair<int,int>>::size_type max_pairs = occ_pairs.size();
+    bool nvir_pairs = false;
+    if (max_pairs == vir_pairs.size()) {
+        nvir_pairs = true;
+    }
+    
+    auto partitions = generate_partitions(excitation_order, max_pairs, nvir_pairs);
+
+    for (const auto& pair : partitions) {
+        const auto& num_pairs = pair.first;
+        const auto& num_singles = pair.second; 
+        // Step 2: Generate combinations of pairs and singles
+        std::vector<std::vector<std::size_t>> hole_pairs, hole_singles;
+        std::vector<std::vector<std::size_t>> part_pairs, part_singles;
+
+        // Iterate over all unique combintaions of pairs and singles
+        std::vector<std::size_t> used_holes, used_parts;
+
+        if (num_pairs > 0) {
+            generate_combinations(holes, 2, hole_pairs, nbasis);
+            generate_combinations(particles, 2, part_pairs, nbasis);
+            
+            for (const auto& hpair_comb : hole_pairs) {
+                for (const auto& ppair_comb : part_pairs){
+                    if (!hpair_comb.empty() || !ppair_comb.empty()) {
+                        long pindx = wfn_.calc_pindex(hpair_comb[0], ppair_comb[0]);
+
+                        // Clear the default values
+                        if (pair_inds[0] == -1) {
+                            pair_inds.clear();
+                        }
+                        pair_inds.push_back(pindx);
+                        used_holes.insert(used_holes.end(), hpair_comb.begin(), hpair_comb.end());
+                        used_parts.insert(used_parts.end(), ppair_comb.begin(), ppair_comb.end());
+                    }
+                }
+            }
+        }
+
+        // Now handle single excitations
+        if (num_singles  > 0) {
+            // Filter holes and particles to exclude used ones
+            std::vector<std::size_t> remaining_holes, remaining_particles;
+
+            // Exclude used holes
+            std::copy_if(holes.begin(), holes.end(), std::back_inserter(remaining_holes),
+                        [&](std::size_t h) { return std::find(used_holes.begin(), used_holes.end(), h) == used_holes.end(); });
+
+            // Exclude used particles
+            std::copy_if(particles.begin(), particles.end(), std::back_inserter(remaining_particles),
+                        [&](std::size_t p) { return std::find(used_parts.begin(), used_parts.end(), p) == used_parts.end(); });
+
+            generate_combinations(remaining_holes, 1, hole_singles, nbasis);
+            generate_combinations(remaining_particles, 1, part_singles, nbasis);
+
+            for (const auto& hsingle_comb : hole_singles) {
+                for (const auto& psingle_comb : part_singles) {
+                    if (std::find(used_holes.begin(), used_holes.end(), hsingle_comb[0]) == used_holes.end() &&
+                        std::find(used_parts.begin(), used_parts.end(), psingle_comb[0]) == used_parts.end()) {
+                        // Calculate the index of the single excitation
+                        long sindx = wfn_.calc_sindex(hsingle_comb[0], psingle_comb[0]);
+                        // Clear the default values
+                        if (single_inds[0] == -1) {
+                            single_inds.clear();
+                        }
+                        single_inds.push_back(sindx);
+                    }
+                }
+            }
+        }
+    
+    }
+}
+
+
+
+
+
+void AP1roGeneralizedSenoObjective::init_overlap(const NonSingletCI &wfn_)
+{
+    long nocc_up = wfn_.nocc / 2; 
+    long nbasis = wfn_.nbasis / 2;
+
+    nparam = nocc_up * (nbasis - nocc_up); //paired-doubles
+    nparam += wfn_.nocc * (wfn_.nbasis - wfn_.nocc); // beta singles
+
+    // ovlp.resize(nconn);
+    // d_ovlp.resize(nconn * nparam);
+    det_exc_param_indx.resize(nconn);
+
+    // std::size_t nword = (ulong)wfn_.nword;
+    long nb = wfn_.nbasis;
+    long nocc = wfn_.nocc;
+
+    for (std::size_t idet = 0; idet != nconn; ++idet)
+    {
+        AlignedVector<ulong> rdet(nword);
+        wfn_.fill_hartreefock_det(nb, nocc, &rdet[0]);
+        const ulong *det = wfn_.det_ptr(idet);
+        ensure_struct_size(det_exc_param_indx, idet+1);
+
+        // Check if given det is the same as the reference det
+        bool are_same = true;
+        for (std::size_t k = 0; k < nword; ++k) {
+            // std::cout << det[k] << " ";
+            if (rdet[k] != det[k]) {
+                are_same = false;
+                break;
+            }
+        }
+        
+        ulong word, hword, pword;
+        std::size_t h, p, nexc = 0;
+
+        std::vector<std::size_t> holes;
+        std::vector<std::size_t> particles;
+
+        // Collect holes and particles
+        for (std::size_t  iword = 0; iword != nword; ++iword)
+        {
+            word = rdet[iword] ^ det[iword]; //str for excitation
+            hword = word & rdet[iword]; //str for hole
+            pword = word & det[iword]; //str for particle
+
+            while(hword){
+                h = Ctz(hword);
+                p = Ctz(pword);
+                
+                std::size_t hole_idx = h + iword * Size<ulong>();
+                std::size_t part_idx = p + iword * Size<ulong>(); // - nocc_up;
+                
+                holes.push_back(hole_idx);
+                particles.push_back(part_idx);
+
+                hword &= ~(1UL << h);
+                pword &= ~(1UL << p);
+
+                ++nexc;
+            }
+        }
+        // New container for the excitation information
+        DetExcParamIndx exc_info;
+        exc_info.det.resize(nword);
+        exc_info.pair_inds.resize(1);
+        exc_info.single_inds.resize(1);
+
+        // Default values for indices
+        exc_info.pair_inds[0] = -1;
+        exc_info.single_inds[0] = -1;
+
+        std::memcpy(&exc_info.det[0], &det[0], sizeof(ulong) * nword);
+
+        // Generate excitations if the given det is not the same as the reference det
+        if (!are_same) generate_excitations(holes, particles, nexc, exc_info.pair_inds, exc_info.single_inds, nbasis, wfn_);
+        
+        // Sort the indices for single and pair excitations
+        std::sort(exc_info.pair_inds.begin(), exc_info.pair_inds.end());
+        std::sort(exc_info.single_inds.begin(), exc_info.single_inds.end());
+
+        // Store the sign of the overall excitation
+        int sign = 1;
+        if (exc_info.pair_inds[0] != -1 || exc_info.single_inds[0] != -1) {
+            sign = sign_excite(rdet, holes, particles);
+        }
+        exc_info.sign = sign;
+        
+        // store the excitation information
+        det_exc_param_indx[idet] = exc_info;
+    }
+}
+
+
+bool AP1roGeneralizedSenoObjective::permanent_calculation(const std::vector<long>& excitation_inds, const double* x, double& permanent) {
+    // Ryser's Algorithm
+    std::size_t n = static_cast<std::size_t>(std::sqrt(excitation_inds.size()));
+    if (n == 0) {permanent = 1.0; return true;}
+    if (n == 1) {
+        permanent = x[excitation_inds[0]]; 
+        return true;
+    }
+    permanent = 0.0;
+    std::size_t subset_count = 1UL << n; // 2^n subsets
+    for (std::size_t subset = 0; subset < subset_count; ++subset) {
+        double rowsumprod = 1.0;
+
+        for (std::size_t  i = 0; i < n; ++i) {
+            double rowsum = 0.0;
+            for (std::size_t j = 0; j < n; ++j) {
+                if (subset & (1UL << j)) {
+                    // std::cout << "\n rowsum: " << rowsum << std::endl;
+                    // std::cout << "excitation_inds[i * n + j]: " << excitation_inds[i * n + j] << std::endl;
+                    // std::cout << "x[excitation_inds[i * n + j]]: " << x[excitation_inds[i * n + j]] << std::endl;
+                    rowsum += x[excitation_inds[i * n + j]];
+                    // std::cout << "rowsum: " << rowsum << std::endl;
+                }
+            }
+            if (std::isnan(rowsum) || std::isinf(rowsum)) {
+                std::cerr << "Error: rowsum is invalid (NaN or Inf) at subset " << subset << std::endl;
+                return false;    
+            }
+            rowsumprod *= rowsum;
+        }
+        if (std::isnan(rowsumprod) || std::isinf(rowsumprod)) {
+            std::cerr << "Error: rowsumprod is invalid (NaN or Inf) at subset " << subset << std::endl;
+            return false;
+        }
+
+        // multiply by the parity of the subset
+        permanent += rowsumprod * (1 - ((__builtin_popcount(subset) & 1) << 1));
+    }
+    // If n (matrix size) is odd, multiply by -1
+    permanent *= ((n % 2 == 1) ? -1 : 1);
+    
+    if (std::isnan(permanent) || std::isinf(permanent)) {
+        std::cerr << "Error: permanent is invalid (NaN or Inf)" << std::endl;
+        return false;
+    }
+    return true;
+}
+
+double AP1roGeneralizedSenoObjective::compute_derivative(const std::vector<long> excitation_inds, 
+    const double* x, std::size_t iparam) {
+
+    // Check if excitation_inds is empty or first element is -1
+    if (excitation_inds.empty() || excitation_inds[0] == -1) {return 0.0;}
+
+    // Check if iparam is within excitation_inds
+    auto it = std::lower_bound(excitation_inds.begin(), excitation_inds.end(), iparam);
+    if (it == excitation_inds.end()) {return 0.0;}
+
+
+    std::size_t n = static_cast<std::size_t>(std::sqrt(excitation_inds.size()));
+    if (n == 0) {return 0.0;}
+    if (n == 1) {
+        return (excitation_inds[0] == static_cast<double>(iparam)) ? 1.0 : 0.0;
+    }
+
+    double derivative = 0.0;
+    std::size_t subset_count = 1UL << n; // 2^n subsets
+    //Find the position of iparam in excitation_inds
+    std::size_t pos = std::distance(excitation_inds.begin(), it);
+    // Derive i and j from pos of iparam
+    std::size_t i = pos / n;
+    std::size_t j = pos % n;
+
+    for (std::size_t subset = 0; subset < subset_count; ++subset) {
+        // skip subsets where column j is not included
+        if (!(subset & (1UL << j))) continue;
+
+        double rowsumprod = 1.0;
+
+        // Compute product of row sums for rows k != i
+        for (std::size_t k = 0; k < n; ++k) {
+            if (k == i) continue;
+
+            double rowsum = 0.0;
+            
+            for (std::size_t l = 0; l < n; ++l) {
+                if (subset & (1UL << l)) {
+                    rowsum += x[excitation_inds[k * n + l]];                    
+                }
+            }            
+            rowsumprod *= rowsum;
+        }
+        // Parity adjustment
+        int subset_parity = (__builtin_popcount(subset) % 2 == 0) ? 1 : -1;
+        derivative += rowsumprod * subset_parity;
+    }
+    // Final adjustment for the parity of the matrix
+    derivative *= ((n % 2 == 1) ? -1 : 1);
+
+    if (std::isnan(derivative) || std::isinf(derivative)) {
+        std::cerr << "Error: permanent is invalid (NaN or Inf)" << std::endl;
+    }
+    return derivative;
+
+}
+
+void AP1roGeneralizedSenoObjective::d_overlap(std::size_t ndet, const double* x, double* y) {
+    std::cout << "-------Computing Derivative\n nbasis: " << nbasis << ", ndet: " << ndet << "\n" ;
+    for (std::size_t idet = 0; idet< ndet; idet++) {
+        // Ensure we have annhiliation and creation indices for this determinant
+        if (idet < det_ac_inds.size()) {
+            std::cout << "\n**Processing idet: " << idet ;
+            const DetExcParamIndx ac_info = det_ac_inds[idet];
+            
+            std::cout << " " ;
+            for (std::size_t k = 0; k < nword; k++) {
+                std::cout << ac_info.det[k]; }
+            std::cout << std::endl;
+
+            // if reference determinant, set the derivative to zero
+            if (ac_info.pair_inds[0] == -1 && ac_info.single_inds[0] == -1) {
+                std::cout << "Processing reference determinant\n";
+                for (std::size_t iparam = 0; iparam < nparam; ++iparam) {
+                    y[ndet * iparam + idet] = 0.0;
+                }
+                continue;
+            } else {
+                std::vector<std::size_t> a_inds(ac_info.pair_inds.begin(), ac_info.pair_inds.end()); //annihilation indices
+                std::vector<std::size_t> c_inds(ac_info.single_inds.begin(), ac_info.single_inds.end()); //creation indices
+                std::pair<std::vector<std::size_t>, std::vector<std::size_t>> key = std::make_pair(a_inds, c_inds);
+
+                // combined_inds.insert(combined_inds.end(), c_inds.begin(), c_inds.end());
+                if (exop_combs.find(key) == exop_combs.end()) {
+                    generate_possible_exops(a_inds, c_inds);
+                }
+
+                auto& inds_multi = exop_combs[key];
+                AlignedVector<ulong> det = ac_info.det;
+                const int sign = ac_info.sign;
+                std::vector<long> occs(nocc);
+                fill_occs(nword, det.data(), &occs[0]);
+
+                for (auto& pair : inds_multi) {
+                    auto& indices_sign = pair.second;
+                    std::vector<std::vector<int>> selected_rows;
+
+                    // std::cout << "exc_order: " << pair.first << std::endl;
+
+                    for (const auto& row : indices_sign) {
+                        std::cout << "Row: ";
+                        std::cout << "[";
+                        for (int i : row) {
+                            std::cout << i << " ";
+                        }
+                        std::cout << "]" << std::endl;
+
+                        std::unordered_set<int> trash;
+                        bool skip_row = false;
+                      
+                        // for (const auto& exop_indx : row) {
+                        for (auto it = row.begin(); it != row.end() - 1; ++it) {
+                            const auto& exop_index = *it;
+
+                            // Check if exop_indx is a valid index
+                            if (exop_index < 0 || static_cast<std::size_t>(exop_index) >= ind_exops.size()) {
+                                std::cout << "ind_exops.size(): " << ind_exops.size() << std::endl;
+                                std::cerr << "Error: exop_index " << exop_index << " is out of bounds" << std::endl;
+                                continue;
+                            }
+                            const auto& exop = ind_exops[exop_index];
+
+                            // std::cout << "exop_index: " << exop_index << std::endl;
+                            // std::cout << "exop: ";
+                            // for (long i : exop) {
+                            //     std::cout << i << " ";
+                            // }
+                            // std::cout << std::endl;
+
+                            if (exop.size() == 2) {
+                                if (trash.find(exop[0]) != trash.end()) {
+                                    // std::cout << "exop[0] already in trash\n";
+                                    skip_row = true;
+                                    break;
+                                }
+                                if (exop[0] < static_cast<int>(nbasis / 2)) {
+                                    if (std::find(occs.begin(),  occs.end(), exop[0] + static_cast<int>(nbasis / 2)) == occs.end()) {
+                                        skip_row = true;
+                                        // std::cout << "exop[0] + nbasis / 2 not in occs\n";
+                                        break;
+                                    } else {
+                                        // std::cout << "exop[0] + nbasis / 2 in occs\n";
+                                        trash.insert(exop[0]);
+                                        trash.insert(exop[0] + static_cast<int>(nbasis / 2));
+                                    }
+                                } else {
+                                    if (std::find(occs.begin(), occs.end(), exop[0] - static_cast<int>(nbasis / 2)) == occs.end()) {
+                                        skip_row = true;
+                                        // std::cout << "exop[0] - nbasis / 2 not in occs\n";
+                                        break;
+                                    } else {
+                                        // std::cout << "exop[0] - nbasis / 2 in occ\n";
+                                        trash.insert(exop[0]);
+                                        trash.insert(exop[0] - static_cast<int>(nbasis / 2));
+                                    }
+                                }
+                            } else {
+                                for (std::size_t j = 0; j < exop.size() / 2; ++j) {
+                                    if (trash.find(exop[j]) != trash.end()) {
+                                        std::cout << "exop[j] already in trash\n";
+                                        skip_row = true;
+                                        break;
+                                    } else {
+                                        trash.insert(exop[j]);
+                                    }
+                                }
+                                if (skip_row) break;
+                            }
+                        }
+                        if (!skip_row) {
+                            selected_rows.push_back(row);
+                        }
+                    }
+                    
+                    std::cout << "Selected: ";
+                    for (const auto& row : selected_rows) {
+                        std::cout << "[";
+                        for (int i : row) {
+                            std::cout << i << " ";
+                        }
+                        std::cout << "]" << std::endl;
+                    }
+                    std::cout << std::endl;
+
+                    indices_sign = selected_rows;
+                    inds_multi[pair.first] = indices_sign;
+                }
+                std::vector<double> damplitudes = product_ampli_multi_deriv(inds_multi, true, x);
+                for (std::size_t iparam = 0; iparam < nparam; ++iparam) {
+                    y[ndet * iparam + idet] = sign * damplitudes[iparam];
+                    if (damplitudes[iparam] != 0.0) {
+                        std::cout <<  "iparam: " << iparam << ", dampli: " << damplitudes[iparam] << ", sign: " << sign << std::endl;
+                    }
+                }  
+
+            }
+        }
+    }
+}
+
+
+//-------------------------------------FIRST APPROACH BEGIN-------------------------------------------------------------------------
+// void AP1roGeneralizedSenoObjective::overlap(std::size_t ndet, const double *x, double *y) {
+//     p_permanent.resize(nconn);
+//     s_permanent.resize(nconn);
+//     //print x
+//     for (std::size_t i = 0; i < nparam; ++i) {
+//         std::cout << x[i] << " ";
+//     }
+//     std::cout << "\n" << std::endl;
+
+
+//     for (std::size_t idet = 0; idet != ndet; ++idet) {
+    
+//         if (idet < det_exc_param_indx.size()) {
+//             // Access the excitation parameter indices
+//             const DetExcParamIndx& exc_info = det_exc_param_indx[idet];
+//             double pair_permanent = 1.0;
+//             double single_permanent = 1.0;
+
+//             if (exc_info.pair_inds[0] != -1) {
+//                 if (!permanent_calculation(exc_info.pair_inds, x, pair_permanent)) {
+//                     std::cerr << "Error calculating pair_permanent for idet" << idet << "\n" << std::endl;
+//                 }
+//             }
+            
+//             if (exc_info.single_inds[0] != -1) {
+//                 if (!permanent_calculation(exc_info.single_inds, x, single_permanent)) {
+//                     std::cerr << "Error calculating single_permanent for idet " << idet << "\n" << std::endl;
+//                 }
+//             }
+//             // If given det is not allowed in the wfn, set the permanents to zero
+//             // idet = 0 is the reference HF determinant 
+//             if (exc_info.pair_inds[0] == -1 && exc_info.single_inds[0] == -1 && idet != 0) {
+//                 pair_permanent = 0.0;
+//                 single_permanent = 0.0;
+//             }
+
+//             p_permanent[idet] = pair_permanent;
+//             s_permanent[idet] = single_permanent;
+
+
+//             if (y != nullptr && idet < ndet) {
+//                 y[idet] = exc_info.sign * pair_permanent * single_permanent;
+//                 if (idet < 225) {
+//                     std::cout << idet << " " << exc_info.det[0] << " " << y[idet] << " " << exc_info.sign << std::endl;
+//                 }
+//             }else {
+//                 std::cerr << "y is nullptr or idet is out of bounds" << std::endl;
+//             }
+//         } else {
+//             std::cout << "idet" <<  idet << " not found in storage" << std::endl;
+//             y[idet] = 0.0;
+//             s_permanent[idet] = 0.0;
+//             p_permanent[idet] = 0.0;
+//         }
+//     }
+// }
+
+
+// void AP1roGeneralizedSenoObjective::d_overlap(const size_t ndet, const double *x, double *y){
+
+//     for (std::size_t idet = 0; idet != ndet; ++idet)   {
+//         // Ensure we have the excitation parameters for this determinant
+//         if (idet < det_exc_param_indx.size()) {
+//             const DetExcParamIndx exc_info = det_exc_param_indx[idet];
+//             double pair_permanent = 1.0;
+//             double single_permanent = 1.0;
+//             if (idet < s_permanent.size() && idet < p_permanent.size()) {
+//                 pair_permanent = p_permanent[idet];
+//                 single_permanent = s_permanent[idet];
+//             }
+//             // else {
+//             //     if (exc_info.pair_inds[0] != -1) {
+//             //         if (!permanent_calculation(exc_info.pair_inds, x, pair_permanent)) {
+//             //             std::cerr << "Error calculating pair_permanent for idet" << idet << std::endl;
+//             //         }
+//             //     }
+
+//             //     if (exc_info.single_inds[0] != -1) {
+//             //         if (!permanent_calculation(exc_info.single_inds, x, single_permanent)) {
+//             //             std::cerr << "Error calculating single_permanent for idet " << idet << std::endl;
+//             //         }
+//             //     }
+                
+//             // }
+            
+//             for (std::size_t iparam = 0; iparam < nparam; ++iparam) {
+//                 double dpair = 0.0;
+//                 double dsingle = 0.0;
+//                 if (exc_info.single_inds[0] != -1) {
+//                     dsingle = compute_derivative(exc_info.single_inds, x, iparam);
+//                 }
+                
+//                 if (exc_info.pair_inds[0] != -1) {
+//                     dpair = compute_derivative(exc_info.pair_inds, x, iparam);
+//                 }
+//                 // std::cout << "\nidet: " << idet << ", det: " << exc_info.det[0] << std::endl;
+//                 // std::cout << "single_inds: " << exc_info.single_inds[0] << ", pair_inds: " << exc_info.pair_inds[0] << std::endl;
+//                 // std::cout << "single_permanent: " << single_permanent << ", pair_permanent: " << pair_permanent << std::endl;
+//                 // std::cout <<  "wrt iparam: " << iparam << ", dpair: " << dpair << ", dsingle: " << dsingle << std::endl;
+//                 // std::cout << "idet: " << idet << " deriv: " << dpair * single_permanent + dsingle * pair_permanent << std::endl;
+//                 y[ndet * iparam + idet] = dpair * single_permanent + dsingle * pair_permanent;
+//             }
+//         }
+//         else {
+//             for (std::size_t iparam = 0; iparam < nparam; ++iparam) {
+//                 y[ndet * iparam + idet] = 0.0;
+//             }
+//         }
+//     }
+// }
+//-------------------------------------FIRST APPROACH END-------------------------------------------------------------------------
+
+
+} // namespace pyci
diff --git a/pyci/src/binding.cpp b/pyci/src/binding.cpp
index 6be94ce..ede9c45 100644
--- a/pyci/src/binding.cpp
+++ b/pyci/src/binding.cpp
@@ -127,6 +127,26 @@ two_mo : numpy.ndarray
 
 )""");
 
+secondquant_op.def_readonly("one_ao", &SQuantOp::one_ao_array, R"""(
+One-particle atomic integral array.
+
+Returns
+-------
+one_mo : numpy.ndarray
+    One-particle atomic integral array.
+
+)""");
+
+secondquant_op.def_readonly("two_ao", &SQuantOp::two_ao_array, R"""(
+Two-particle atomic integral array.
+
+Returns
+-------
+two_mo : numpy.ndarray
+    Two-particle atomic integral array.
+
+)""");
+
 secondquant_op.def_readonly("h", &SQuantOp::h_array, R"""(
 Seniority-zero one-particle molecular integral array.
 
@@ -877,6 +897,90 @@ genci_wfn.def(py::init<const long, const long, const long, const Array<ulong>>()
 genci_wfn.def(py::init<const long, const long, const long, const Array<long>>(), py::arg("nbasis"),
               py::arg("nocc_up"), py::arg("nocc_dn"), py::arg("array"));
 
+/*
+Section: NonSingletCI wave function class
+*/
+
+py::class_<NonSingletCI, GenCIWfn> nonsingletci_wfn(m, "nonsingletci_wfn");
+
+nonsingletci_wfn.doc() = R"""(
+Non-singlet CI wave function base class.
+)""";
+
+nonsingletci_wfn.def(py::init<const DOCIWfn &>(), R"""(
+Initialize a Non-singlet CI wave function.
+
+Parameters
+----------
+wfn : (pyci.doci_wfn | pyci.fullci_wfn | pyci.genci_wfn | pyci.nonsingletci_wfn)
+    Wave function from which to copy data.
+
+or
+
+Parameters
+----------
+filename : TextIO
+    Filename of binary file from which to load wave function.
+
+or
+
+Parameters
+----------
+nbasis : int
+    Number of spatial orbital functions.
+nocc_up : int
+    Number of occupied spin-up orbitals.
+nocc_dn : int
+    Number of occupied spin-down orbitals.
+
+)""",
+              py::arg("wfn"));
+
+nonsingletci_wfn.def(py::init<const FullCIWfn &>(), py::arg("wfn"));
+
+nonsingletci_wfn.def(py::init<const NonSingletCI &>(), py::arg("wfn"));
+
+nonsingletci_wfn.def(py::init<const std::string &>(), py::arg("filename"));
+
+nonsingletci_wfn.def(py::init<const long, const long, const long>(), py::arg("nbasis"), py::arg("nocc_up"),
+              py::arg("nocc_dn"));
+
+nonsingletci_wfn.def(py::init<const long, const long, const long, const Array<ulong>>(), py::arg("nbasis"),
+              py::arg("nocc_up"), py::arg("nocc_dn"), py::arg("array"));
+
+nonsingletci_wfn.def(py::init<const long, const long, const long, const Array<long>>(), py::arg("nbasis"),
+              py::arg("nocc_up"), py::arg("nocc_dn"), py::arg("array"));
+
+nonsingletci_wfn.def("fill_hartreefock_det", &NonSingletCI::fill_hartreefock_det, R"""(
+Fill the Hartree-Fock determinant in the given ptr. 
+
+Parameters
+----------
+nb2 : int 
+    Number of basis functions for total number of spin orbitals.
+    Because of the way GenCI handles bit strings, it will be 
+    nonsingletci_wfn.nbasis =  ham.nbasis * 2.
+nocc : int
+    Total number of occupied orbitals.
+det : array
+    Pointer to the array where the Hartree-Fock determinant will be stored. 
+)""",
+              py::arg("nb2"), py::arg("nocc"), py::arg("det"));
+
+nonsingletci_wfn.def("add_excited_dets", &NonSingletCI::py_add_excited_dets, R"""(
+Add excited determinants to the wave function.
+
+Parameters
+----------
+exc : int
+    Excitation order.
+ref : numpy.ndarray, default=None
+    Reference determinant. Default is the Hartree-Fock determinant.
+
+)""",
+              py::arg("exc"), py::arg("ref") = py::none());
+
+
 /*
 Section: Sparse CI matrix operator class
 */
@@ -929,6 +1033,59 @@ ncol : int
 
 )""");
 
+sparse_op.def_property_readonly("dtype", &SparseOp::dtype, R"""(
+/*
+Section: Sparse CI matrix operator class
+*/
+
+py::class_<SparseOp> sparse_op(m, "sparse_op");
+
+sparse_op.doc() = R"""(
+Sparse matrix operator class.
+)""");
+
+sparse_op.def_readonly("ecore", &SparseOp::ecore, R"""(
+Constant (or "zero-particle") integral.
+
+Returns
+-------
+ecore : float
+    Constant (or "zero-particle") integral.
+
+)""");
+
+sparse_op.def_readonly("symmetric", &SparseOp::symmetric, R"""(
+Whether the sparse matrix operator is symmetric/Hermitian.
+
+Returns
+-------
+symmetric : bool
+    Whether the sparse matrix operator is symmetric/Hermitian.
+
+)""");
+
+sparse_op.def_readonly("size", &SparseOp::size, R"""(
+Number of non-zero matrix elements.
+
+Returns
+-------
+size : int
+    Number of non-zero matrix elements.
+
+)""");
+
+sparse_op.def_readonly("shape", &SparseOp::shape, R"""(
+Shape of the matrix.
+
+Returns
+-------
+nrow : int
+    Number of rows.
+ncol : int
+    Number of columns.
+
+)""");
+
 sparse_op.def_property_readonly("dtype", &SparseOp::dtype, R"""(
 Data type of matrix.
 
@@ -966,6 +1123,9 @@ sparse_op.def(py::init<const SQuantOp &, const FullCIWfn &, const long, const lo
 sparse_op.def(py::init<const SQuantOp &, const GenCIWfn &, const long, const long, const bool>(),
               py::arg("ham"), py::arg("wfn"), py::arg("nrow") = -1, py::arg("ncol") = -1,
               py::arg("symmetric") = true);
+sparse_op.def(py::init<const SQuantOp &, const NonSingletCI &, const long, const long, const bool, const std::string>(),
+              py::arg("ham"), py::arg("wfn"), py::arg("nrow") = -1, py::arg("ncol") = -1,
+              py::arg("symmetric") = true, py::arg("wfntype") = "nonsingletci");
 
 sparse_op.def("update", &SparseOp::py_update<DOCIWfn>, R"""(
 Update a sparse matrix operator for the HCI algorithm.
@@ -990,6 +1150,8 @@ sparse_op.def("update", &SparseOp::py_update<FullCIWfn>, py::arg("ham"), py::arg
 
 sparse_op.def("update", &SparseOp::py_update<GenCIWfn>, py::arg("ham"), py::arg("wfn"));
 
+sparse_op.def("update", &SparseOp::py_update<NonSingletCI>, py::arg("ham"), py::arg("wfn")); //, py::arg("rows"));
+
 sparse_op.def("__call__", &SparseOp::py_matvec, R"""(
 Compute the matrix vector product of the sparse matrix operator with vector ``x``.
 
@@ -1400,6 +1562,25 @@ genci_objective.def("objective", &Objective<GenCIWfn>::py_objective, DOCSTRING_O
 genci_objective.def("jacobian", &Objective<GenCIWfn>::py_jacobian, DOCSTRING_JACOBIAN,
     py::arg("op"), py::arg("x"));
 
+
+py::class_<Objective<NonSingletCI>> nonsingletci_objective(m, "NonSingletCIObjective");
+
+nonsingletci_objective.doc() = R"""(
+NonSingletCI-FanCI objective class.
+)""";
+
+nonsingletci_objective.def("overlap", &Objective<NonSingletCI>::py_overlap, DOCSTRING_OVERLAP,
+    py::arg("x"));
+
+nonsingletci_objective.def("d_overlap", &Objective<NonSingletCI>::py_d_overlap, DOCSTRING_D_OVERLAP,
+    py::arg("x"));
+
+nonsingletci_objective.def("objective", &Objective<NonSingletCI>::py_objective, DOCSTRING_OBJECTIVE,
+    py::arg("op"), py::arg("x"));
+
+nonsingletci_objective.def("jacobian", &Objective<NonSingletCI>::py_jacobian, DOCSTRING_JACOBIAN,
+    py::arg("op"), py::arg("x"));
+
 py::class_<AP1roGObjective, Objective<DOCIWfn>> ap1rog_objective(m, "AP1roGObjective");
 
 ap1rog_objective.doc() = R"""(
@@ -1460,4 +1641,37 @@ param_cons : np.ndarray(Np, dtype=pyci.c_double)
     py::arg("op"), py::arg("wfn"), py::arg("idx_det_cons") = py::none(), py::arg("det_cons") = py::none(),
     py::arg("idx_param_cons") = py::none(), py::arg("param_cons") = py::none());
 
+// Declare Python class based on AP1roGeneralizedSenoObjective
+py::class_<AP1roGeneralizedSenoObjective, Objective<NonSingletCI>> ap1rogen_objective(m, "AP1roGeneralizedSenoObjective");
+
+// Class documentation
+ap1rogen_objective.doc() = R"""(
+AP1roGSDGeneralized_sen-o objective class.
+)""";
+
+// Initializer (keep in mind the Wfn argument should be the right class)
+ap1rogen_objective.def(py::init<const SparseOp &, const NonSingletCI &, const py::object, const py::object, const py::object, const py::object>(),
+R"""(
+Initialize the AP1roGSDGeneralized_sen-o objective instance.
+
+Parameters
+----------
+op : pyci.sparse_op
+    Sparse operator instance with ``nproj`` rows ("P" space) and ``nconn`` columns ("S" space).
+wfn : pyci.doci_wfn
+    NonSingletCI wave function with ``nconn`` determinants.
+    The first ``nproj`` determinants should correspond to the "S" space.
+idx_det_cons : np.ndarray(Nd, dtype=pyci.c_long)
+    The indices of the determinants on which constraints should be placed.
+det_cons : np.ndarray(Nd, dtype=pyci.c_double)
+    The values to which the constrained determinants' overlaps should be equal.
+idx_param_cons : np.ndarray(Np, dtype=pyci.c_long)
+    The indices of the parameters on which constraints should be placed.
+param_cons : np.ndarray(Np, dtype=pyci.c_double)
+    The values to which the constrained parameters should be equal.
+
+)""",
+    py::arg("op"), py::arg("wfn"), py::arg("idx_det_cons") = py::none(), py::arg("det_cons") = py::none(),
+    py::arg("idx_param_cons") = py::none(), py::arg("param_cons") = py::none());
+
 END_MODULE
diff --git a/pyci/src/fanci.cpp b/pyci/src/fanci.cpp
index c5f9f10..9b20ea4 100644
--- a/pyci/src/fanci.cpp
+++ b/pyci/src/fanci.cpp
@@ -258,4 +258,6 @@ template class Objective<FullCIWfn>;
 
 template class Objective<GenCIWfn>;
 
+template class Objective<NonSingletCI>;
+
 } // namespace pyci
diff --git a/pyci/src/fullciwfn.cpp b/pyci/src/fullciwfn.cpp
index b532611..494a23e 100644
--- a/pyci/src/fullciwfn.cpp
+++ b/pyci/src/fullciwfn.cpp
@@ -14,6 +14,7 @@
  * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
 
 #include <pyci.h>
+#include <iostream>
 
 namespace pyci {
 
diff --git a/pyci/src/genciwfn.cpp b/pyci/src/genciwfn.cpp
index b3d3f44..4f92ecd 100644
--- a/pyci/src/genciwfn.cpp
+++ b/pyci/src/genciwfn.cpp
@@ -14,7 +14,7 @@
  * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
 
 #include <pyci.h>
-
+#include <iostream>
 namespace pyci {
 
 GenCIWfn::GenCIWfn(const GenCIWfn &wfn) : OneSpinWfn(wfn) {
@@ -26,20 +26,55 @@ GenCIWfn::GenCIWfn(GenCIWfn &&wfn) noexcept : OneSpinWfn(wfn) {
 GenCIWfn::GenCIWfn(const DOCIWfn &wfn) : GenCIWfn(FullCIWfn(wfn)) {
 }
 
+// GenCIWfn::GenCIWfn(const FullCIWfn &wfn) : OneSpinWfn(wfn.nbasis * 2, wfn.nocc, 0) {
 GenCIWfn::GenCIWfn(const FullCIWfn &wfn) : OneSpinWfn(wfn.nbasis * 2, wfn.nocc, 0) {
     ndet = wfn.ndet;
-    dets.resize(wfn.ndet * wfn.nword2);
+    dets.resize(wfn.ndet * wfn.nword);
     AlignedVector<long> occs(wfn.nocc);
+    std::cout << "Inside GenCIWfn constructor" << std::endl;
+    std::cout << "wfn.nbasis: " << wfn.nbasis << ", wfn.nocc: " << wfn.nocc << std::endl;
+    std::cout << "wfn.nocc: " << wfn.nocc << ", wfn.nocc_up, _dn: " << wfn.nocc_up << ", " << wfn.nocc_dn  << std::endl;
+    std::cout << "wfn.ndet: " << wfn.ndet << std::endl;
     long *occs_up = &occs[0], *occs_dn = &occs[wfn.nocc_up];
     long j, k = 0;
-    for (long i = 0; i < wfn.ndet; ++i) {
+
+    std::cout << "occs: " ;
+    for (int i = 0; i < wfn.nocc; i++) {
+        std::cout << occs[i] << " ";
+    }
+    std::cout << std::endl;
+    
+    for (long i = 0; i < wfn.ndet; i += 1) {
         fill_occs(wfn.nword, wfn.det_ptr(i), occs_up);
-        fill_occs(wfn.nword, wfn.det_ptr(i), occs_dn);
+        fill_occs(wfn.nword, wfn.det_ptr(i) + wfn.nword, occs_dn);
+
+        std::cout << "\nwfn.det_ptr(" << i << "): " << wfn.det_ptr(i)[0] << " " << wfn.det_ptr(i)[1] << std::endl;
+        std::cout << "occs_up: " ;
+        for (int l = 0; l < wfn.nocc_up; l++) {
+            std::cout << occs_up[l] << " ";
+        }
+        std::cout << std::endl;
+        std::cout << "occs_dn: " ;
+        for (int l = 0; l < wfn.nocc_dn; l++) {
+            std::cout << occs_dn[l] << " ";
+        }
+        std::cout << std::endl;
+
         for (j = 0; j < wfn.nocc_dn; ++j)
             occs_dn[j] += wfn.nbasis;
+
+        std::cout << "updated occs_dn: ";
+        for(int l = 0; l < wfn.nocc_dn; l++) {
+            std::cout << occs_dn[l] << " ";
+        }
+        std::cout << std::endl;
+
+
         fill_det(wfn.nocc, occs_up, &dets[k]);
         dict[rank_det(&dets[k])] = i;
-        k += wfn.nword2;
+        std::cout << "det[" << k << "]: " << dets[k] << std::endl;
+        k += wfn.nword;
+           
     }
 }
 
diff --git a/pyci/src/nonsingletci.cpp b/pyci/src/nonsingletci.cpp
new file mode 100644
index 0000000..8bf6841
--- /dev/null
+++ b/pyci/src/nonsingletci.cpp
@@ -0,0 +1,621 @@
+/* This file is part of PyCI.
+ *
+ * PyCI is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your
+ * option) any later version.
+ *
+ * PyCI is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <pyci.h>
+#include <iostream>
+
+namespace pyci {
+
+NonSingletCI::NonSingletCI(const NonSingletCI &wfn) : GenCIWfn(wfn) {
+}
+
+NonSingletCI::NonSingletCI(NonSingletCI &&wfn) noexcept : GenCIWfn(wfn) {
+}
+
+NonSingletCI::NonSingletCI(const DOCIWfn &wfn) : GenCIWfn(wfn){
+}
+
+NonSingletCI::NonSingletCI(const FullCIWfn &wfn) : GenCIWfn(wfn){
+}
+
+NonSingletCI::NonSingletCI(const std::string &filename) : GenCIWfn(filename) {
+}
+
+NonSingletCI::NonSingletCI(const long nb, const long nu, const long nd) : GenCIWfn(nb, nu, nd) {
+}
+
+NonSingletCI::NonSingletCI(const long nb, const long nu, const long nd, const long n, const ulong *ptr)
+    : GenCIWfn(nb, nu, nd, n, ptr) {
+}
+
+NonSingletCI::NonSingletCI(const long nb, const long nu, const long nd, const long n, const long *ptr)
+    : GenCIWfn(nb, nu, nd, n, ptr) {
+}
+
+NonSingletCI::NonSingletCI(const long nb, const long nu, const long nd, const Array<ulong> array)
+    : NonSingletCI(nb, nu, nd, array.request().shape[0],
+                   reinterpret_cast<const ulong *>(array.request().ptr)) {
+}
+
+NonSingletCI::NonSingletCI(const long nb, const long nu, const long nd, const Array<long> array)
+    : NonSingletCI(nb, nu, nd, array.request().shape[0],
+                   reinterpret_cast<const long *>(array.request().ptr)) {
+}
+
+
+// Function to generate cartesian product from a vector of pairs
+// In our case: this function is used to generate all possible combinations of
+// occupied orbitals to excite from for singles given pair of occupied orbitals in ref det
+std::vector<std::vector<long>> NonSingletCI::generate_cartesian_product(
+    const AlignedVector<std::pair<int,int>>& pairs, std::size_t k) {
+    std::vector<std::vector<long>> result;
+    // std::cout << "Inside nonsingletci/generate_cartesian_product" << std::endl;
+    // std::cout <<   "pairs.size(): " << pairs.size() << std::endl;
+    // std::cout << "k: " << k << std::endl;
+
+    std::size_t nocc_pairs = pairs.size();
+    if (k > nocc_pairs) return result;
+    std::vector<std::vector<long>> temp_combinations = {{}};
+
+    
+    for (const auto& pair: pairs) {
+        std::vector<std::vector<long>> new_result;
+
+        // For each combination in the current result, 
+        // extend it with elements of the pair
+        for (const auto& combination: temp_combinations) {
+            if (combination.size() < k){
+                for (auto elem : {pair.first, pair.second}) {
+                    auto new_combination = combination;
+                    new_combination.push_back(elem);
+                    new_result.push_back(new_combination);
+                }
+            }
+        }
+        // Move the new combination into the result
+        temp_combinations = std::move(new_result);  
+    }
+    // Filter out combinations that are not of size k
+    for (const auto& combination: temp_combinations) {
+        if (combination.size() == k) {
+            result.push_back(combination);
+        }
+    
+    }
+    // Debug output to ensure combinations are being generated
+    // std::cout << "Generated " << result.size() << " combinations" << std::endl;
+    
+    return result;
+}
+
+
+// Function to generate combinations, based on indices
+std::vector<std::vector<long>> NonSingletCI::generate_combinations(std::size_t n, std::size_t k) {
+    std::vector<std::vector<long>> combinations;
+
+    if (k > n) return combinations;
+
+    std::vector<long> indices(n);
+    std::iota(indices.begin(), indices.end(), 0);
+
+    std::vector<bool> mask(n, false);
+    std::fill(mask.begin(), mask.begin() + k, true);
+
+    do {
+        std::vector<long> combination;
+        for (std::size_t i = 0; i < n; ++i) {
+            if (mask[i]) combination.push_back(indices[i]);
+        }
+        combinations.push_back(combination);
+    } while (std::prev_permutation(mask.begin(), mask.end()));
+    
+    return combinations;
+}
+
+template <typename T>
+void NonSingletCI::print_vector(const std::string& name, const AlignedVector<T>& vec) {
+    std::cout << name << ": ";
+    for (const auto& elem : vec) {
+        std::cout << elem << " ";
+    }
+    std::cout << std::endl;
+}
+void NonSingletCI::print_pairs(const std::string& label, const AlignedVector<std::pair<int, int>>& pairs) {
+    std::cout << label << ": ";
+    for (const auto& elem : pairs) {
+        std::cout << elem.first << " " << elem.second << " ";
+    }
+    std::cout << std::endl;
+}
+
+long NonSingletCI::calc_sindex(const long occ, const long vir) const {
+    long o, v;
+    long nb = nbasis / 2;
+    if (vir < nb) {
+        v = vir - nocc / 2;
+    } else {
+        v = vir - nocc;
+    }                  
+    if (occ >= nb) {
+        o = occ - (nb - nocc / 2);
+    } else{
+        o = occ;
+    }
+    // std::cout << "\nocc: " << occ << ", vir: " << vir << std::endl;
+    // std::cout << "o: " << o << ", v: " << v << std::endl;
+    // std::cout << "nocc: " << nocc << ", nb: " << nb << std::endl;
+
+    // long idx = nocc / 2 * (nbasis - nocc) / 2 +  o * (nbasis - nocc) + v; //considering doubles are listed first
+
+    long idx = o * (nbasis - nocc) + v; // considering singles are listed first
+    // std::cout << "idx: " << idx << std::endl;
+    return idx;
+}
+
+long NonSingletCI::calc_pindex(const long occ, const long vir) const {
+    long o, v;
+    // Assuming that only alpha orbitals are provided in the input
+    if (vir < nbasis / 2) {
+        v = vir - nocc / 2;
+    } else {
+        v = vir - nocc;
+    }                  
+    if (occ >= nbasis / 2) {
+        o = occ - (nbasis / 2 - nocc / 2);
+    } else {
+        o = occ;
+    }
+    // long idx = (nbasis / 2 - nocc / 2) * o + v; //considering doubles are listed first
+    long idx = (nbasis - nocc) * nocc + (nbasis / 2 - nocc / 2) * o + v; //considering singles are listed first
+    return idx;
+}
+
+void NonSingletCI::add_excited_dets(const ulong *rdet, const long e){
+    // std::cout << "Inside nonsingletci/add_excited_dets" << std::endl;
+    // //long i, j, k, no = binomial(nocc_up, e), nv = binomial(nvirs_up, e);
+    // std::cout << "nocc: " << nocc << ", nvir: " << nvir << std::endl;
+    // std::cout << "nocc_up: " << nocc_up << ", nvir_up: " << nvir_up << std::endl;
+    // std::cout << "nocc_dn: " << nocc_dn << ", nvir_dn: " << nvir_dn << std::endl;
+    // std::cout << "nbasis: " << nbasis << std::endl;
+
+    AlignedVector<ulong> det(nword);
+    AlignedVector<long> occs(nocc);
+    AlignedVector<long> virs(nbasis - nocc);
+    AlignedVector<long> occs_up, occs_dn, virs_up, virs_dn;
+    occs_up.reserve(occs.size());
+    occs_dn.reserve(occs.size());
+    virs_up.reserve(occs.size());
+    virs_dn.reserve(occs.size());
+
+    AlignedVector<std::pair<int,int>> occ_pairs;
+    AlignedVector<std::pair<int,int>> vir_pairs;
+    occ_pairs.reserve(occs_up.size());
+    vir_pairs.reserve(virs.size());
+
+    AlignedVector<long> occinds(e + 1);
+    AlignedVector<long> virinds(e + 1);
+    fill_occs(nword, rdet, &occs[0]);
+    fill_virs(nword, nbasis, rdet, &virs[0]);
+
+    for (int i : occs) {
+        (i < nbasis / 2 ? occs_up : occs_dn).push_back(i);
+    }
+
+    for (int a : virs) {
+        (a < nbasis / 2 ? virs_up : virs_dn).push_back(a);
+    }
+
+    // Efficient resizing
+    occs_up.shrink_to_fit();
+    occs_dn.shrink_to_fit();
+    virs_up.shrink_to_fit();
+    virs_dn.shrink_to_fit();
+
+    // std::cout << "nocc_up: " << nocc_up << ", nvir_up: " << nvir_up << std::endl;
+    // std::cout << "nocc: " << nocc << ", nvir: " << nvir << std::endl;
+    // std::cout << "e: " << e << std::endl;
+    // std::cout << "rdet: " << *rdet << std::endl;
+    // print_vector("occs", occs);
+    // print_vector("virs", virs);
+    // print_vector("occs_up", occs_up);
+    // print_vector("occs_dn", occs_dn);
+    // print_vector("virs_up", virs_up);
+    // print_vector("virs_dn", virs_dn);
+
+    // Create an unordered set for fast lookup of occupied down-orbitals
+    std::unordered_set<int> occ_dn_set(occs_dn.begin(), occs_dn.end()); 
+    std::unordered_set<int> virs_set(virs.begin(), virs.end());
+
+    // Generate occ_pairs and vir_pairs
+    for (int i : occs_up) {
+        if (occ_dn_set.find(i + nbasis/2) != occ_dn_set.end()) {
+            occ_pairs.push_back({i, i + nbasis/2});   
+        }
+    }
+
+    for (int a : virs) {
+        if (virs_set.find(a + nbasis/2) != virs_set.end()) {
+            vir_pairs.push_back({a, a + nbasis/2});   
+        }
+    }
+    
+    // print_pairs("occ_pairs", occ_pairs);
+    // print_pairs("vir_pairs", vir_pairs);
+    
+
+    // Handle excitation order 0
+    if (e == 0) {
+        // std::cout << "Handling excitation order 0" << std::endl;
+        
+        std::memcpy(&det[0], rdet, sizeof(ulong) * nword);
+        add_det(&det[0]);
+        
+        return ;
+    }
+    // Handle excitation order 1
+    if (e == 1) {
+        // Flatten occ_pairs into a single-dimensional array
+        std::vector<long> flattened_occs;
+        for (const auto& pair : occ_pairs) {
+            flattened_occs.push_back(pair.first);
+            flattened_occs.push_back(pair.second);
+        }
+        // std::cout << "-----Handling excitation order 1-----" << std::endl;
+        // std::cout << "Determinants of excitation order 1" << std::endl;
+
+        for (long occ : flattened_occs) {
+            for (long vir : virs) {
+                std::memcpy(&det[0], rdet, sizeof(ulong) * nword);
+                excite_det(occ, vir, &det[0]);
+                add_det(&det[0]);
+                
+                // std::cout << "Determinant after excitation of " << occ << " " << vir << std::endl;
+                // for (int k = 0; k < nword; ++k) {
+                //     std::cout << det[k] << " ";
+                // }
+                // std::cout << std::endl;
+            }
+            // std::cout << std::endl;
+        }
+        // std::cout << std::endl; 
+        return ;
+
+
+        // //-----------------------pCCDSspin test-----------------------
+        // // std::cout << "-----Handling excitation order 1-----" << std::endl;
+        // for (long occ : occs_up) {
+        //     for (long vir : virs_up) {
+        //         std::memcpy(&det[0], rdet, sizeof(ulong) * nword);
+        //         excite_det(occ, vir, &det[0]);
+        //         add_det(&det[0]);
+        //     }
+        // }
+
+        // for (long occ : occs_dn) {
+        //     for (long vir : virs_dn) {
+        //         std::memcpy(&det[0], rdet, sizeof(ulong) * nword);
+        //         excite_det(occ, vir, &det[0]);
+        //         add_det(&det[0]);
+        //     }
+        // }
+        // return;
+        // //-----------------------pCCDSspin test-----------------------
+    }
+
+    // Handle excitation orders >= 2
+    std::size_t nocc_pairs = occ_pairs.size();
+    std::size_t nvir_pairs = vir_pairs.size();
+
+    // std::cout << "nocc_pairs: " << nocc_pairs << std::endl;
+    
+    if (e >= 2) {
+        // Iterate over possible (d,s) pairs: d pair excitations and s single excitations
+        // std::cout << "--------Handling excitation order >= 2--------" << std::endl;
+        
+        for (std::size_t d = 0; d <= std::min(static_cast<std::size_t>(e/2), nocc_pairs); ++d){
+            std::size_t num_singles = e - 2 * d;
+
+            // std::cout << "d: " << d << ", num_singles: " << num_singles << std::endl;
+            
+            // Not enough pairs for singles
+            if (num_singles > (nocc_pairs - d)) {
+                // std::cout << "Not enough pairs for singles" << std::endl;
+                continue; 
+            }
+
+            // Generate occ pair and vir pair combinations
+            std::vector<std::vector<std::vector<long>>> opair_combinations;
+            std::vector<std::vector<std::vector<long>>> vpair_combinations;
+            opair_combinations.push_back(generate_combinations(nocc_pairs, d));            
+            vpair_combinations.push_back(generate_combinations(nvir_pairs, d));
+
+
+            // std::cout << "Generated opair_combinations: " << std::endl;
+            // for (const auto& opair_comb : opair_combinations[0]) {
+            //     for (const auto& idx : opair_comb) {
+            //         std::cout << "(" << occ_pairs[idx].first << ", " << occ_pairs[idx].second << ") ";
+            //     }                
+            // }
+            // std::cout << std::endl;
+
+            
+            // std::cout << "Generated vpair_combinations: " << std::endl;
+            // for (const auto& pair_comb : vpair_combinations[0]) {
+            //     for (const auto& idx : pair_comb) {
+            //         std::cout << "(" << vir_pairs[idx].first << ", " << vir_pairs[idx].second << ") ";
+            //     }                
+            // }
+            // std::cout << std::endl;
+
+            // Process pair combinations for current d
+            for (const auto& opair_comb : opair_combinations[0]) {
+                for (const auto& vpair_comb : vpair_combinations[0]) {
+                    // if (opair_comb.empty() || vpair_comb.empty()) {
+                        // std::cout << "Error: Empty combination detected" << std::endl;
+                        // continue;
+                    // }
+
+                    std::memcpy(&det[0], rdet, sizeof(ulong) * nword);
+                    std::vector<long> used_virs;
+                    std::vector<long> used_occs;
+                    // DetExcParamIndx det_exc;
+                    
+                    for (std::size_t idx = 0; idx < d; ++idx) {
+                        const auto& occ_pair = occ_pairs[opair_comb[idx]];
+                        const auto& vir_pair = vir_pairs[vpair_comb[idx]];
+
+                        excite_det(occ_pair.first, vir_pair.first, &det[0]);
+                        excite_det(occ_pair.second, vir_pair.second, &det[0]);
+
+
+                        used_occs.push_back(occ_pair.first);
+                        used_occs.push_back(occ_pair.second);
+                        used_virs.push_back(vir_pair.first);
+                        used_virs.push_back(vir_pair.second);
+                        
+                        // std::cout << "Determinant after pair excitation of" << std::endl;
+                        // std::cout << "occ_pair: " << occ_pair.first << " " << occ_pair.second << std::endl;
+                        // std::cout << "vir_pair: " << vir_pair.first << " " << vir_pair.second << std::endl;
+                        // for (int k = 0; k < nword; ++k) {
+                        //     std::cout << det[k] << " ";
+                        // }
+                        // std::cout << std::endl;
+                        
+                    }
+
+   
+                    if (num_singles > 0) {
+                        // Determine remaining occupied pairs
+                        AlignedVector<std::pair<int,int>> remaining_occ_pairs;
+                        for (std::size_t i = 0; i < nocc_pairs; ++i) {
+                            if (std::find(opair_comb.begin(), opair_comb.end(), i) == opair_comb.end()) {
+                                remaining_occ_pairs.push_back(occ_pairs[i]);
+                            }
+                        }
+                        // print_pairs("remaining_occ_pairs", remaining_occ_pairs);
+
+
+                        AlignedVector<long> remaining_virs;
+                        for (std::size_t i = 0; i < virs.size(); ++i) {
+                            if (std::find(used_virs.begin(), used_virs.end(), virs[i]) == used_virs.end()) {
+                                remaining_virs.push_back(virs[i]);
+                            }
+                        }
+                        // print_vector("remaining_virs", remaining_virs);
+
+
+                        // Process single combinations for current num_singles
+                        auto occ_combinations = generate_cartesian_product(remaining_occ_pairs, num_singles);
+                        auto vir_combinations = generate_combinations(remaining_virs.size(), num_singles);
+
+                        // std::cout << "Generated occ_combinations: " << std::endl;
+                        // for (const auto& occ_comb : occ_combinations) {
+                        //     for (const auto& elem: occ_comb) {
+                        //         std::cout << "(" << elem << ") ";
+                        //     }
+                        //     std::cout << std::endl;
+                        // }
+                        
+                        // std::cout << "Generated vir_combinations: " << std::endl;
+                        // for (const auto& vir_comb : vir_combinations) {
+                        //     std::cout << "(";
+                        //     for (const auto& vir_idx : vir_comb) {
+                        //         std::cout << remaining_virs[vir_idx] << " ";
+                        //     }
+                        //     std::cout << ")" << std::endl;
+                        // }
+
+                        
+                        for (const auto& occ_comb : occ_combinations) {
+                            for (const auto& vir_comb : vir_combinations) {
+                                // Do NOT reset det here; use the alredy existed det from pair excitations
+                                AlignedVector<ulong> temp_det(nword);
+                                std::memcpy(&temp_det[0], det.data(), sizeof(ulong) * nword);
+                                
+                                // Apply single excitations
+                                for (std::size_t idx = 0; idx < num_singles; ++idx){
+                                    long occ = occ_comb[idx];
+                                    long vir = remaining_virs[vir_comb[idx]];
+                                    excite_det(occ, vir, &temp_det[0]);
+                                    // std::cout << "Exciting occ: " << occ << " to vir: " << vir << std::endl;
+                                    
+                                }
+                                add_det(&temp_det[0]);
+                                // Print determinant after single excitations
+                                // std::cout << "Determinant for single combination" << std::endl;
+                                // for (int k = 0; k < nword; ++k) {
+                                //     std::cout << temp_det[k] << " ";
+                                // }
+                                // std::cout << std::endl;
+                            }
+                            
+                        }
+
+                        // //-----------------------pCCDSspin test-----------------------
+                        // AlignedVector<long> remaining_occs;
+                        // AlignedVector<long> remaining_virs;
+                        // for (long occ : occs) {
+                        //     if (std::find(used_occs.begin(), used_occs.end(), occ) == used_occs.end()) {
+                        //         remaining_occs.push_back(occ);
+                        //     }
+                        // }
+
+                        // for (long vir : virs) {
+                        //     if (std::find(used_virs.begin(), used_virs.end(), vir) == used_virs.end()) {
+                        //         remaining_virs.push_back(vir);
+                        //     }
+                        // }
+
+                        // AlignedVector<long> rem_occ_up;
+                        // AlignedVector<long> rem_occ_dn;
+                        // for (long occ : remaining_occs) {
+                        //     if (occ < nbasis / 2) {
+                        //         rem_occ_up.push_back(occ);
+                        //     } else {
+                        //         rem_occ_dn.push_back(occ);
+                        //     }
+                        // }
+
+                        // AlignedVector<long> rem_vir_up;
+                        // AlignedVector<long> rem_vir_dn;
+                        // for (long vir : remaining_virs) {
+                        //     if (vir < nbasis / 2) {
+                        //         rem_vir_up.push_back(vir);
+                        //     } else {
+                        //         rem_vir_dn.push_back(vir);
+                        //     }
+                        // }
+                   
+                        // auto occ_combs = generate_combinations(rem_occ_up.size(), num_singles);
+                        // auto vir_combs = generate_combinations(rem_vir_up.size(), num_singles);
+
+                        
+                        // for (const auto& occ_comb : occ_combs) {
+                        //     for (const auto& vir_comb : vir_combs) {
+                        //         AlignedVector<ulong> temp_det(nword);
+                        //         std::memcpy(&temp_det[0], det.data(), sizeof(ulong) * nword);
+                        //         for (std::size_t idx = 0; idx < num_singles; ++idx) {
+                        //             long occ = rem_occ_up[occ_comb[idx]];
+                        //             long vir = rem_vir_up[vir_comb[idx]];
+                        //             excite_det(occ, vir, &temp_det[0]);
+                        //         }
+                        //         add_det(&temp_det[0]);
+                        //     }
+                        // }
+
+                        // auto occ_combs_dn = generate_combinations(rem_occ_dn.size(), num_singles);
+                        // auto vir_combs_dn = generate_combinations(rem_vir_dn.size(), num_singles);
+
+                        // for (const auto& occ_comb : occ_combs_dn) {
+                        //     for (const auto& vir_comb : vir_combs_dn) {
+                        //         AlignedVector<ulong> temp_det(nword);
+                        //         std::memcpy(&temp_det[0], det.data(), sizeof(ulong) * nword);
+                        //         for (std::size_t idx = 0; idx < num_singles; ++idx) {
+                        //             long occ = rem_occ_dn[occ_comb[idx]];
+                        //             long vir = rem_vir_dn[vir_comb[idx]];
+                        //             excite_det(occ, vir, &temp_det[0]);
+                        //         }
+                        //         add_det(&temp_det[0]);
+                        //     }
+                        // }
+
+                        // //-----------------------pCCDSspin test-----------------------
+                    }   
+                    else if (num_singles == 0){
+                        // If num_singles == 0, directly add determinant after pair excitations
+                        add_det(&det[0]);
+                        // Print determinant after single excitations
+                        // std::cout << "Determinant for s=0" << std::endl;
+                        // for (int k = 0; k < nword; ++k) {
+                        //     std::cout << det[k] << " ";
+                        // }
+                        // std::cout << std::endl; 
+                    }
+                }    
+            }
+        }
+        return ;
+    }
+}
+
+ 
+
+
+void NonSingletCI::fill_hartreefock_det(long nb2, long nocc, ulong *det) const {
+   /* GenCIWfn build using FullCIWfn initializes the OneSpinWfn with nbasis * 2, so we are calling it nb2 here*/
+    long nb = nb2/2;
+    // Ensure nocc is even
+    if (nocc % 2 != 0) {
+        throw std::invalid_argument("Number of occupied orbitals (nocc) must be even.");
+    }
+    long nocc_beta = nocc / 2;
+    long nocc_alpha = nocc / 2;
+    // long num_ulongs = (nb2 + Size<ulong>() - 1) / Size<ulong>();
+    
+
+    // std::cout << "Inside nonsingletci/fill_hartreefock_det" << std::endl;
+    // std::cout << "nb: " << nb << std::endl;
+    // std::cout << "nocc: " << nocc << std::endl;
+    // std::cout << "nocc_beta: " << nocc_beta << std::endl;
+    // std::cout << "nocc_alpha: " << nocc_alpha << std::endl;
+
+    // std::cout << "Filling beta spin orbitals" << std::endl;
+    for (long i = 0; i < nocc_beta; ++i) {
+        long bit_index = nb + i;
+        // std::cout << "bit_index: " << bit_index << std::endl;
+        det[bit_index / Size<ulong>()] |= 1UL << (bit_index % Size<ulong>());
+    }
+    
+    // std::cout << "Filling alpha spin orbitals" << std::endl;
+    for (long i = 0; i < nocc_alpha; ++i) {
+        // std::cout << "bit_index: " << i << std::endl;
+        long bit_index = i;
+        det[bit_index / Size<ulong>()] |= 1UL << (bit_index % Size<ulong>());
+    }
+    // std::cout << "det: ";
+    // for (int i = 0; i < num_ulongs; ++i) {
+    //     std::cout << det[i] << " ";
+    // }
+    // std::cout << std::endl;
+}
+
+
+
+/**
+ * @brief Adds excited determinants to the current wavefunction.
+ *
+ * This function generates excited determinants based on the given excitation level
+ * and reference determinant. If no reference determinant is provided, it uses the
+ * Hartree-Fock determinant.
+ *
+ * @param exc The excitation level.
+ * @param ref The reference determinant as a pybind11 object. If none, Hartree-Fock determinant is used.
+ * @return The number of new determinants added.
+ */
+long NonSingletCI::py_add_excited_dets(const long exc, const pybind11::object ref) {
+    AlignedVector<ulong> v_ref;
+    ulong *ptr;
+    if (ref.is(pybind11::none())) {
+        v_ref.resize(nword);
+        ptr = &v_ref[0];
+        fill_hartreefock_det(nbasis, nocc, ptr);
+    } else {
+        ptr = reinterpret_cast<ulong *>(ref.cast<Array<ulong>>().request().ptr);
+    }
+    long ndet_old = ndet;
+    add_excited_dets(ptr, exc);
+    return ndet - ndet_old;
+}
+} //namespace pyci
diff --git a/pyci/src/onespinwfn.cpp b/pyci/src/onespinwfn.cpp
index 4cba643..9c9f183 100644
--- a/pyci/src/onespinwfn.cpp
+++ b/pyci/src/onespinwfn.cpp
@@ -14,7 +14,8 @@
  * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
 
 #include <pyci.h>
-
+#include <iostream>
+#include <bitset>
 namespace pyci {
 
 OneSpinWfn::OneSpinWfn(const OneSpinWfn &wfn) : Wfn(wfn) {
@@ -225,6 +226,7 @@ void OneSpinWfn::add_excited_dets(const ulong *rdet, const long e) {
     AlignedVector<long> virinds(e + 1);
     fill_occs(nword, rdet, &occs[0]);
     fill_virs(nword, nbasis, rdet, &virs[0]);
+    
     for (k = 0; k < e; ++k)
         virinds[k] = k;
     virinds[e] = nvir_up + 1;
@@ -234,8 +236,11 @@ void OneSpinWfn::add_excited_dets(const ulong *rdet, const long e) {
         occinds[e] = nocc_up + 1;
         for (j = 0; j < no; ++j) {
             std::memcpy(&det[0], rdet, sizeof(ulong) * nword);
-            for (k = 0; k < e; ++k)
+            for (k = 0; k < e; ++k) {
                 excite_det(occs[occinds[k]], virs[virinds[k]], &det[0]);
+            }
+
+            std::cout << std::endl;
             add_det(&det[0]);
             next_colex(&occinds[0]);
         }
diff --git a/pyci/src/sparseop.cpp b/pyci/src/sparseop.cpp
index 1eac1ee..6b0a24a 100644
--- a/pyci/src/sparseop.cpp
+++ b/pyci/src/sparseop.cpp
@@ -14,6 +14,7 @@
  * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
 
 #include <pyci.h>
+#include <iostream>
 
 namespace pyci {
 
@@ -70,6 +71,14 @@ SparseOp::SparseOp(const SQuantOp &ham, const GenCIWfn &wfn, const long rows, co
     update<GenCIWfn>(ham, wfn, nrow, ncol, 0);
 }
 
+SparseOp::SparseOp(const SQuantOp &ham, const NonSingletCI &wfn, const long rows, const long cols,
+                   const bool symm, const std::string wfntype)
+    : nrow((rows > -1) ? rows : wfn.ndet), ncol((cols > -1) ? cols : wfn.ndet), size(0),
+      ecore(ham.ecore), symmetric(symm) {
+    append<long>(indptr, 0);
+    update<NonSingletCI>(ham, wfn, nrow, ncol, 0);
+}
+
 pybind11::object SparseOp::dtype(void) const {
     return pybind11::dtype::of<double>();
 }
@@ -183,6 +192,9 @@ template void SparseOp::py_update(const SQuantOp &, const FullCIWfn &);
 
 template void SparseOp::py_update(const SQuantOp &, const GenCIWfn &);
 
+template void SparseOp::py_update(const SQuantOp &ham, const NonSingletCI &wfn);
+// {    update<NonSingletCI>(ham, wfn, wfn.ndet, wfn.ndet, nrow);}
+
 template<class WfnType>
 void SparseOp::update(const SQuantOp &ham, const WfnType &wfn, const long rows, const long cols,
                       const long startrow) {
@@ -200,6 +212,24 @@ void SparseOp::update(const SQuantOp &ham, const WfnType &wfn, const long rows,
     size = indices.size();
 }
 
+
+void SparseOp::update(const SQuantOp &ham, const NonSingletCI &wfn, const long rows, const long cols,
+                      const long startrow) {
+    AlignedVector<ulong> det(wfn.nword);
+    AlignedVector<long> occs(wfn.nocc);
+    AlignedVector<long> virs(wfn.nbasis - wfn.nocc);
+    shape = pybind11::make_tuple(pybind11::cast(rows), pybind11::cast(cols));
+    nrow = rows;
+    ncol = cols;
+    indptr.reserve(nrow + 1);
+    for (long idet = startrow; idet < rows; ++idet) {
+        add_row(ham, wfn, idet, &det[0], &occs[0], &virs[0]);
+        sort_row(idet);
+    }
+    size = indices.size();
+}
+
+
 void SparseOp::reserve(const long n) {
     indices.reserve(n);
     data.reserve(n);
@@ -253,6 +283,7 @@ void SparseOp::add_row(const SQuantOp &ham, const DOCIWfn &wfn, const long idet,
         append<double>(data, val1 + val2 * 2);
         append<long>(indices, idet);
     }
+
     // add pointer to next row's indices
     append<long>(indptr, indices.size());
 }
@@ -399,7 +430,7 @@ void SparseOp::add_row(const SQuantOp &ham, const FullCIWfn &wfn, const long ide
                 koffset = ioffset + n2 * kk;
                 // loop over spin-down virtual indices
                 for (l = j + 1; l < wfn.nvir_dn; ++l) {
-                    ll = virs_dn[l];
+                 ;   ll = virs_dn[l];
                     // 0-2 excitation elements
                     excite_det(kk, ll, det_dn);
                     jdet = wfn.index_det(det_up);
@@ -501,4 +532,205 @@ void SparseOp::add_row(const SQuantOp &ham, const GenCIWfn &wfn, const long idet
     append<long>(indptr, indices.size());
 }
 
+
+void SparseOp::add_row(const SQuantOp &ham, const NonSingletCI &wfn, const long idet, ulong *det_up,
+                       long *occs, long *virs) {
+    long n = wfn.nbasis / 2; // nspatial
+    long n1 = wfn.nbasis; // nspin
+
+    // std::cout << "n: " << n << ", n1: " << n1 << std::endl;
+    long n2 = n1 * n1;
+    long n3 = n1 * n2;
+   
+    long i, j, k, l, ii, jj, kk, ll, jdet, jmin = symmetric ? idet : Max<long>();
+    long ioffset, koffset, sign_up;
+    double val1, val2 = 0.0;
+    const ulong *rdet_up = wfn.det_ptr(idet);
+    std::memcpy(det_up, rdet_up, sizeof(ulong) * wfn.nword); 
+
+    fill_occs(wfn.nword, rdet_up, occs);
+    fill_virs(wfn.nword, wfn.nbasis, rdet_up, virs);
+    
+    long nocc_up = __builtin_popcount(*det_up & ((1 << n) - 1));
+    long nocc_dn = wfn.nocc - nocc_up;  
+    long nvir_up = n - nocc_up;
+    long nvir_dn = n - nocc_dn;
+    long nvir = nvir_up + nvir_dn;
+    
+    // std::cout << "nocc_up: " << nocc_up << ", nocc_dn: " << nocc_dn << ", nvir_up: " << nvir_up << ", nvir_dn: " << nvir_dn << std::endl;
+
+    long *virs_up = virs;
+    long *virs_dn = nullptr;
+    for (long i = 0; i < nvir; ++i) {
+        if (virs[i] >= n) {
+            virs_dn = &virs[i];
+            break;
+        }
+    }
+    long *occs_up = occs;
+    long *occs_dn = nullptr;
+    for (long i = 0; i < wfn.nocc; ++i) {
+        if (occs[i] >= n) {
+            occs_dn = &occs[i];
+            break;
+        }
+    }
+    int counter = 0;
+
+    // loop over spin-up occupied indices
+    for (i = 0; i < nocc_up; ++i) {
+        ii = occs_up[i];
+        ioffset = n3 * ii;
+        
+        // compute part of diagonal matrix element
+        val2 += ham.one_ao[(n1 + 1) * ii];
+        for (k = i + 1; k < nocc_up; ++k) {
+            kk = occs_up[k];
+            koffset = ioffset + n2 * kk;
+            val2 += ham.two_ao[koffset + n1 * ii + kk] - ham.two_ao[koffset + n1 * kk + ii];  
+        }
+        for (k = 0; k < nocc_dn; ++k) {
+            kk = occs_dn[k];
+            val2 += ham.two_ao[ioffset + n2 * kk + n1 * ii + kk];
+        }
+        // first excitation: alpha -> alpha 
+        for (j = 0; j < nvir_up; ++j) {
+            jj = virs_up[j];
+            excite_det(ii, jj, det_up);
+            sign_up = phase_single_det(wfn.nword, ii, jj, rdet_up);
+            jdet = wfn.index_det(det_up);
+            
+            // check if the excited determinant is in wfn
+            if ((jdet != -1) && (jdet < jmin) && (jdet < ncol)) {
+                counter += 1;
+                val1 = ham.one_ao[n1 * ii + jj];
+                for (k = 0; k < nocc_up; ++k) {
+                    kk = occs_up[k];
+                    koffset = ioffset + n2 * kk;
+                    val1 += ham.two_ao[koffset + n1 * jj + kk] - ham.two_ao[koffset + n1 * kk + jj];
+                }
+                for (k = 0; k < nocc_dn; ++k) {
+                    kk = occs_dn[k];
+                    val1 += ham.two_ao[ioffset + n2 * kk + n1 * jj + kk];
+                }
+                append<double>(data, sign_up * val1);
+                append<long>(indices, jdet);
+            }
+            
+            // loop over spin-up occupied indices
+            for (k = i+1; k < nocc_up; ++k) {
+                kk = occs_up[k];
+                koffset = ioffset + n2 * kk;
+                // second excitation: alpha -> alpha
+                for (l = j + 1; l < nvir_up; ++l) {
+                    ll = virs_up[l];
+                    excite_det(kk, ll, det_up);
+                    jdet = wfn.index_det(det_up);
+                    // check if the excited determinant is in wfn
+                    if ((jdet != -1) && (jdet < jmin) && (jdet < ncol)) {
+                        counter += 1;
+                        // add the aaaa -->  aaaa matrix element
+                        append<double>(data, phase_double_det(wfn.nword, ii, kk, jj, ll, rdet_up) *
+                                                 (ham.two_ao[koffset + n1 * jj + ll] -
+                                                  ham.two_ao[koffset + n1 * ll + jj]));
+                        append<long>(indices, jdet);    
+                    }
+                    excite_det(ll, kk, det_up); 
+                }
+            }
+            
+            // loop over spin-down occupied indices
+            for (k = 0; k < nocc_dn; ++k) {
+                kk = occs_dn[k];
+                koffset = ioffset + n2 * kk;
+                // second excitation: beta -> beta
+                for (l = 0; l < nvir_dn; ++l) {
+                    ll = virs_dn[l];
+                    excite_det(kk, ll, det_up);
+                    jdet = wfn.index_det(det_up);
+                    // check if the excited determinant is in wfn
+                    if ((jdet != -1) && (jdet < jmin) && (jdet < ncol)) {
+                        double sign = phase_double_det(wfn.nword, ii, kk, jj, ll, rdet_up);
+                        counter += 1;
+                        append<double>(data, sign * ham.two_ao[ioffset + n2 * kk + n1 * jj + ll]); 
+                        append<long>(indices, jdet);
+                    }
+                    excite_det(ll, kk, det_up); 
+                }
+            }            
+            excite_det(jj, ii, det_up);
+        }
+    }
+    // loop over spin-down occupied indices
+    for (i = 0; i < nocc_dn; ++i) {
+        ii = occs_dn[i];
+        ioffset = n3 * ii;
+        // compute part of diagonal matrix element
+        val2 += ham.one_ao[(n1 + 1) * ii];
+        for (k = i + 1; k < nocc_dn; ++k) {
+            kk = occs_dn[k];
+            // if (kk >= nbasis) kk -= nbasis;
+            koffset = ioffset + n2 * kk;
+            val2 += ham.two_ao[koffset + n1 * ii + kk] - ham.two_ao[koffset + n1 * kk + ii];
+        }   
+        ii = occs_dn[i];
+ 
+        // first excitation: beta -> beta
+        for (j = 0; j < nvir_dn; ++j) {
+            jj = virs_dn[j];
+            excite_det(ii, jj, det_up);
+            jdet = wfn.index_det(det_up);
+            // check if the excited determinant is in wfn
+            if ((jdet != -1) && (jdet < jmin) && (jdet < ncol)) {
+                counter += 1;
+                val1 = ham.one_ao[n1 * ii + jj];
+                for (k = 0; k < nocc_up; ++k) {
+                    kk = occs_up[k];
+                    val1 += ham.two_ao[ioffset + n2 * kk + n1 * jj + kk];
+                }
+                for (k = 0; k < nocc_dn; ++k) {
+                    kk = occs_dn[k];
+                    koffset = ioffset + n2 * kk;
+                    val1 += ham.two_ao[koffset + n1 * jj + kk] - ham.two_ao[koffset + n1 * kk + jj];
+                }
+                append<double>(data, phase_single_det(wfn.nword, ii, jj, rdet_up) * val1);
+                append<long>(indices, jdet);
+                
+            }
+            // second excitation: beta -> beta
+            for (k = i + 1; k < nocc_dn; ++k) {
+                kk = occs_dn[k];
+                koffset = ioffset + n2 * kk;
+                kk = occs_dn[k];
+                for (l = j + 1; l < nvir_dn; ++l) {
+                    ll = virs_dn[l];
+                    // beta -> beta excitation elements
+                    excite_det(kk, ll, det_up);
+                    jdet = wfn.index_det(det_up);
+                    // check if excited determinant is in wfn
+                    if ((jdet != -1) && (jdet < jmin) && (jdet < ncol)) {
+                        double sign = phase_double_det(wfn.nword, ii, kk, jj, ll, rdet_up);
+                        counter += 1;
+                        append<double>(data, sign * (ham.two_ao[koffset + n1 * jj + ll] -
+                                                     ham.two_ao[koffset + n1 * ll + jj]));
+                        append<long>(indices, jdet);
+                    }
+                    excite_det(ll, kk, det_up);
+                }
+            }
+            excite_det(jj, ii, det_up);
+        }
+    }
+    // add diagonal element to matrix
+    if (idet < ncol) {
+        counter += 1;
+        append<double>(data, val2);
+        append<long>(indices, idet);
+    };
+    // add pointer to next row's indices
+    append<long>(indptr, indices.size());
+}
+
+
+
 } // namespace pyci
diff --git a/pyci/src/squantop.cpp b/pyci/src/squantop.cpp
index 59b6e7b..48ef0ba 100644
--- a/pyci/src/squantop.cpp
+++ b/pyci/src/squantop.cpp
@@ -25,17 +25,21 @@ SQuantOp::SQuantOp(void) {
 }
 
 SQuantOp::SQuantOp(const SQuantOp &ham)
-    : nbasis(ham.nbasis), ecore(ham.ecore), one_mo(ham.one_mo), two_mo(ham.two_mo), h(ham.h),
+    : nbasis(ham.nbasis), ecore(ham.ecore), one_mo(ham.one_mo), two_mo(ham.two_mo), 
+      one_ao(ham.one_ao), two_ao(ham.two_ao), h(ham.h),
       v(ham.v), w(ham.w), one_mo_array(ham.one_mo_array), two_mo_array(ham.two_mo_array),
+      one_ao_array(ham.one_ao_array), two_ao_array(ham.two_ao_array),
       h_array(ham.h_array), v_array(ham.v_array), w_array(ham.w_array) {
 }
 
 SQuantOp::SQuantOp(SQuantOp &&ham) noexcept
     : nbasis(std::exchange(ham.nbasis, 0)), ecore(std::exchange(ham.ecore, 0.0)),
       one_mo(std::exchange(ham.one_mo, nullptr)), two_mo(std::exchange(ham.two_mo, nullptr)),
+      one_ao(std::exchange(ham.one_ao, nullptr)), two_ao(std::exchange(ham.two_ao, nullptr)),
       h(std::exchange(ham.h, nullptr)), v(std::exchange(ham.v, nullptr)),
       w(std::exchange(ham.w, nullptr)), one_mo_array(std::move(ham.one_mo_array)),
-      two_mo_array(std::move(ham.two_mo_array)), h_array(std::move(ham.h_array)),
+      two_mo_array(std::move(ham.two_mo_array)), one_ao_array(std::move(ham.one_ao_array)),
+      two_ao_array(std::move(ham.two_ao_array)),  h_array(std::move(ham.h_array)),
       v_array(std::move(ham.v_array)), w_array(std::move(ham.w_array)) {
 }
 
@@ -112,7 +116,7 @@ SQuantOp::SQuantOp(const std::string &filename) {
     h = reinterpret_cast<double *>(h_array.request().ptr);
     v = reinterpret_cast<double *>(v_array.request().ptr);
     w = reinterpret_cast<double *>(w_array.request().ptr);
-
+    
     long n1, n2, n3;
     n1 = nbasis;
     n2 = n1 * n1;
@@ -158,6 +162,37 @@ SQuantOp::SQuantOp(const std::string &filename) {
                 two_mo[i * n3 + j * n2 + i * n1 + j] * 2 - two_mo[i * n3 + j * n2 + j * n1 + i];
         }
     }
+
+    // Spatial to spin conversion (n * n) to (2n * 2n)
+    long m1 = n1 *2;
+    long m2 = m1 * m1;
+    long m3 = m1 * m2;
+
+    one_ao_array = Array<double>({m1, m1});
+    two_ao_array = Array<double>({m1, m1, m1, m1});
+    one_ao = reinterpret_cast<double *>(one_ao_array.request().ptr);
+    two_ao = reinterpret_cast<double *>(two_ao_array.request().ptr);
+
+    for (long i = 0; i < n1; ++i) {
+        for (long j = 0; j < n1; ++j) {
+            one_ao[i * m1 + j] = one_mo[i * n1 + j];
+            one_ao[(i + n1) * m1 + (j + n1)] = one_mo[i * n1 + j];
+        }
+    }
+
+    for (long i = 0; i < n1; ++i) {
+        for (long j = 0; j < n1; ++j) {
+            for (long k = 0; k < n1; ++k) {
+                for (long l = 0; l < n1; ++l) {
+                    two_ao[i * m3 + j * m2 + k * m1 + l]  = two_mo[i * n1 * n1 * n1 + j * n1 * n1 + k * n1 + l];
+                    two_ao[(i + n1) * m3 + (j + n1) * m2 + (k + n1) * m1 + (l + n1)] = two_mo[i * n1 * n1 * n1 + j * n1 * n1 + k * n1 + l];
+                    two_ao[i * m3 + (j + n1) * m2 + k * m1 + (l + n1)] = two_mo[i * n1 * n1 * n1 + j * n1 * n1 + k * n1 + l];
+                    two_ao[(i + n1) * m3 + j * m2 + (k + n1) * m1 + l] = two_mo[i * n1 * n1 * n1 + j * n1 * n1 + k * n1 + l];
+                }
+            }
+        }
+    }
+
 }
 
 SQuantOp::SQuantOp(const double e, const Array<double> mo1, const Array<double> mo2)
diff --git a/pyci/src/twospinwfn.cpp b/pyci/src/twospinwfn.cpp
index def18ef..e0fabc6 100644
--- a/pyci/src/twospinwfn.cpp
+++ b/pyci/src/twospinwfn.cpp
@@ -14,6 +14,7 @@
  * along with PyCI. If not, see <http://www.gnu.org/licenses/>. */
 
 #include <pyci.h>
+#include <iostream>
 
 namespace pyci {
 
@@ -251,6 +252,7 @@ void TwoSpinWfn::add_excited_dets(const ulong *rdet, const long e_up, const long
     }
     OneSpinWfn wfn_up(nbasis, nocc_up, nocc_up);
     wfn_up.add_excited_dets(&rdet[0], e_up);
+
     OneSpinWfn wfn_dn(nbasis, nocc_dn, nocc_dn);
     wfn_dn.add_excited_dets(&rdet[nword], e_dn);
     AlignedVector<ulong> det(nword2);
@@ -337,8 +339,9 @@ long TwoSpinWfn::py_add_excited_dets(const long exc, const pybind11::object ref)
     long maxdn = (nocc_dn < nvir_dn) ? nocc_dn : nvir_dn;
     long a = (exc < maxup) ? exc : maxup;
     long b = exc - a;
-    while ((a >= 0) && (b <= maxdn))
+    while ((a >= 0) && (b <= maxdn)){
         add_excited_dets(ptr, a--, b++);
+    }
     return ndet - ndet_old;
 }