Source code for vinecopulas.vinecopula

# -*- coding: utf-8 -*-
"""
Created on Thu Feb 22 16:40:09 2024


"""


import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import scipy.stats as st
from itertools import product
import networkx as nx
import matplotlib.pyplot as plt
from vinecopulas.bivariate import *

# %% Copulas


[docs]
copulas = {
    1: "Gaussian",
    2: "Gumbel0",
    3: "Gumbel90",
    4: "Gumbel180",
    5: "Gumbel270",
    6: "Clayton0",
    7: "Clayton90",
    8: "Clayton180",
    9: "Clayton270",
    10: "Frank",
    11: "Joe0",
    12: "Joe90",
    13: "Joe180",
    14: "Joe270",
    15: "Student",
}


# %% fitting vinecopula



[docs]
def fit_vinecop(u1, copsi, vine="R", printing=True):
    """
    Fit a regular vine copula to data.

    Arguments:
        *u1* :  the data, provided as a numpy array where each column contains a separate variable (eg. u1,u2,...,un), which have already been transferred to standard uniform margins (0<= u <= 1)

        *copsi* : A list of integers referring to the copulae of interest for which the fit has to be evaluated in the vine copula. eg. a list of [1, 10] refers to the Gaussian and Frank copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

        *vine* : The type of vine copula that needs to be fit, either 'R', 'D', or 'C'

        *printing*: True if the fitted copula should be printed and False if not


    Returns:
     *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

     *p* : Parameters of the bivariate copulae provided as a triangular matrix.

     *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

    """
    # Reference: Dißmann et al. 2013

    v1 = []  # list for variable 1
    v2 = []  # list for  variable 2
    tauabs = []  # list for the absolute kendal tau between v1 and v2
    dimen = u1.shape[1]  # number of variables (number of columns)
    for i in range(dimen - 1):
        for j in range(i + 1, dimen):
            v1.append(int(i))  # add variable to v1
            v2.append(int(j))  # add variable to v2
            tauabs.append(
                abs(st.kendalltau(u1[:, i], u1[:, j])[0])
            )  # calculate the absolute kendall tau between v1 and v2 and add it to the taubs list

    order1 = pd.DataFrame(
        {"v1": v1, "v2": v2, "tauabs": tauabs}
    )  # put the v1, v2, and tauabs list into a dataframe for the first tree
    order1 = order1.sort_values(by="tauabs", ascending=False).reset_index(
        drop=True
    )  # sort this dataframe from highest to lowest tauabs

    # R vine
    if vine == "R":
        inde = []  # list to put used rows of order1 in
        for i in range(
            len(order1)
        ):  # loop through all the rows to include those pairs with the highest ktau first
            if i == 0:
                order2 = order1.head(
                    1
                )  # create a new dataframe where the row with the highest ktau is the first row
            else:
                if (
                    order1.v1[i] in list(order1.v2[:i])
                    or order1.v1[i] in list(order1.v1[:i])
                ) and (
                    order1.v2[i] in list(order1.v2[:i])
                    or order1.v2[i] in list(order1.v1[:i])
                ):  # see if both v1i and v2i are already in order2, first rows with unique variables need to be added to ensure all variables are included
                    continue
                else:
                    inde.append(i)  # add used rows to inde
                    order2 = pd.concat(
                        [order2, order1.loc[i].to_frame().T], ignore_index=True
                    )  # add row to dataframe order2
        if len(order2) < (
            dimen - 1
        ):  # the first tree will have the number of variables - 1 number of rows.
            for i in range(
                1, len(order1)
            ):  # loop through all the combinations again and add based on the highest ktauabs
                if i in inde:  # check if row has not been added before
                    continue
                lst = (
                    list(order2.v2[order2.v1 == order1.v2[i]])
                    + list(order2.v1[order2.v2 == order1.v2[i]])
                    + list(order2.v2[order2.v1 == order1.v1[i]])
                    + list(order2.v1[order2.v2 == order1.v1[i]])
                )
                l1 = list(order2.v2[order2.v1 == order1.v2[i]]) + list(
                    order2.v1[order2.v2 == order1.v2[i]]
                )
                lk = l1.copy()
                while len(lk) > 0:
                    lk2 = lk.copy()
                    lk = []
                    for j in lk2:
                        ln = list(order2.v2[order2.v1 == j]) + list(
                            order2.v1[order2.v2 == j]
                        )
                        try:
                            ln.remove(order1.v2[i])
                        except:
                            pass

                        for s in l1:
                            try:
                                ln.remove(s)
                            except:
                                pass
                        l1 = l1 + ln
                        lk = lk + ln

                l2 = list(order2.v2[order2.v1 == order1.v1[i]]) + list(
                    order2.v1[order2.v2 == order1.v1[i]]
                )
                lk = l2.copy()
                while len(lk) > 0:
                    lk2 = lk.copy()
                    lk = []
                    for j in lk2:
                        ln = list(order2.v2[order2.v1 == j]) + list(
                            order2.v1[order2.v2 == j]
                        )
                        try:
                            ln.remove(order1.v1[i])
                        except:
                            pass

                        for s in l2:
                            try:
                                ln.remove(s)
                            except:
                                pass
                        l2 = l2 + ln
                        lk = lk + ln
                skip = False
                for val in l1:
                    if val in l2:
                        skip = True
                        break
                if skip == True:
                    continue
                # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1): # ensure that there are no groups of variables that all have a connection with one another
                #  continue
                if len(lst) == len(
                    set(lst)
                ):  # check that there are no 'triangles', e.g. three variables that are alll connected with eachother
                    order2 = pd.concat(
                        [order2, order1.loc[i].to_frame().T], ignore_index=True
                    )  # add row if conditions are met
                if (
                    len(order2) == dimen - 1
                ):  # end loop when desired number of rows (number of variables - 1) has been met
                    break

    # D vine
    elif vine == "D":
        inde = []  # lsit to put used variables in
        for j in range(dimen - 1):
            if j == 0:
                order2 = order1.head(
                    1
                )  # loop through all the rows to include those pairs with the highest ktau first
                vi = order2.v1[0]  # select the first variable included in this row
                vj = order2.v2[0]  # select the second variable included in this row
            else:
                for i in range(len(order1)):
                    if (
                        order1.v1[i] == vj
                        or order1.v2[i] == vi
                        or order1.v1[i] == vi
                        or order1.v2[i] == vj
                    ):  # find the rows which include at least one of the two variables from the previous edge
                        if (
                            order1.v1[i] in inde or order1.v2[i] in inde
                        ):  # check if variables have already been connected to other variables twice
                            continue
                        if (order1.v1[i] == vi and order1.v2[i] == vj) or (
                            order1.v1[i] == vj and order1.v2[i] == vi
                        ):  # skip rows that have already been used
                            continue
                        else:
                            order2 = pd.concat(
                                [order2, order1.loc[i].to_frame().T], ignore_index=True
                            )  # if conditions are met, add this to the dataframe of the first tree
                            if (
                                order1.v1[i] == vj or order1.v2[i] == vj
                            ):  # check which value has been used twice already
                                inde.append(vj)  # add this value to inde
                                if order1.v1[i] == vj:
                                    vj = order1.v2[
                                        i
                                    ]  # select the new edge to connect to
                                else:
                                    vj = order1.v1[
                                        i
                                    ]  # select the new edge to connect to
                            elif (
                                order1.v1[i] == vi or order1.v2[i] == vi
                            ):  # check which value has been used twice already
                                inde.append(vi)  # add this value to inde
                                if order1.v1[i] == vi:
                                    vi = order1.v2[
                                        i
                                    ]  # select the new edge to connect to
                                else:
                                    vi = order1.v1[
                                        i
                                    ]  # select the new edge to connect to
    elif vine == "C":
        taus = []  # list to put the sum of all tauabs in for a specific variable
        for i in range(dimen):
            taus.append(
                sum(order1[(order1.v1 == i) | (order1.v2 == i)].tauabs)
            )  # calculate the sum of all taus for a specific variable
        i = np.where(np.array(taus) == max(taus))[0][
            0
        ]  # find where the sum of the taus is the highest to find the vairable to place in the center of the first tree
        order2 = order1[(order1.v1 == i) | (order1.v2 == i)].reset_index(
            drop=True
        )  # select the rows that include this variable

    order1 = order2  # make order1 == the first tree
    del order2
    rhos = []  # list for the rhos
    node = []  # list for the nodes
    cops = []  # list for the copulas
    v1_1 = []  # list for the 1st nodes
    v2_1 = []  # list for the 2nd nodes
    aics = []  # list for the AIC's
    for i in range(len(order1)):
        v1i = int(order1.v1[i])  # first node
        v2i = int(order1.v2[i])  # second node
        u3 = np.vstack(
            (u1[:, v1i], u1[:, v2i])
        ).T  # stacking the combination to fit copula to
        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
        aics.append(aic)  # add AIC to aics
        rhos.append(rho)  # add parameters to rhos
        cops.append(cop)  # add copula to cops
        node.append([v1i, v2i])  # create final node
        v1_1.append(u1[:, v1i])  # add array of first node
        v2_1.append(u1[:, v2i])  # add array of second node
    v1_1 = np.array(v1_1).T  # v1
    v2_1 = np.array(v2_1).T  # v2

    # add information to dataframe of the first tree
    order1["rhos"] = rhos
    order1["node"] = node
    order1["tree"] = 0
    order1["cop"] = cops
    order1["AIC"] = aics

    # set up variables for the second tree
    v1 = []
    v2 = []
    ktau = []
    rhos = []
    node = []
    cops = []
    v1_k = []
    v2_k = []
    aics = []
    for i in range(
        len(order1) - 1
    ):  # loop through the first tree to identify all possible combination of nodes
        v1i = int(order1.v1[i])  # parent node 1 from nodei in tree
        v2i = int(order1.v2[i])  # parent node 2 from nodei in tree
        copi = int(order1.cop[i])  # copula of nodei
        pari = order1.rhos[i]  # parameters of nodei
        for j in (
            np.where(
                np.array([item == v1i for item in list(order1.v1[i + 1 :])])
                | np.array([item == v1i for item in list(order1.v2[i + 1 :])])
                | np.array([item == v2i for item in list(order1.v1[i + 1 :])])
                | np.array([item == v2i for item in list(order1.v2[i + 1 :])])
            )[0]
            + i
            + 1
        ):  # see if possible connection between nodei and nodej
            v1j = int(order1.v1[j])  # parent node 1 from nodej in tree
            v2j = int(order1.v2[j])  # parent node 2 from nodej in tree
            copj = int(order1.cop[j])  # copula of nodei
            parj = order1.rhos[j]  # parameters of nodei
            v1.append(order1.node[i])  # parent node for next tree
            v2.append(order1.node[j])  # parent node for next tree
            lst = order1.node[i] + order1.node[j]  # list of all variables in node
            s = max(
                set(lst), key=lst.count
            )  # variable that is common in both parent nodes
            # parent node values
            ui1 = v1_1[:, i]
            ui2 = v2_1[:, i]
            uj1 = v1_1[:, j]
            uj2 = v2_1[:, j]
            # defining which parent node the conditional CDF needs to be based on
            if v1i == s:
                uni = 1
                vi1 = v2i
            else:
                uni = 2
                vi1 = v1i
            if v1j == s:
                unj = 1
                vj1 = v2j
            else:
                unj = 2
                vj1 = v1j
            # calculate the conditional CDF
            v1igs = hfunc(copi, ui1, ui2, pari, un=uni)
            v2jgs = hfunc(copj, uj1, uj2, parj, un=unj)
            ktau.append(
                abs(st.kendalltau(v1igs, v2jgs)[0])
            )  # calculate the absolute kendall tau between v1igs and v2jgs, add it to the ktau list
            # add the parent node values
            v1_k.append(v1igs)
            v2_k.append(v2jgs)
            # format the final node
            node.append([vi1, vj1, "g", s])

    k = 2  # second tree

    orderk = pd.DataFrame(
        {"v1": v1, "v2": v2, "tauabs": ktau, "node": node}
    )  # put the v1, v2, tauabs, and the node in list into a dataframe for tree k

    if vine == "R" or vine == "D":
        if (
            len(orderk) > dimen - k
        ):  # the tree will have the number of variables - k number of rows, if this is exceeded in orderk, a selection has to be made based on kendal tau
            orderk = orderk.sort_values(
                by="tauabs", ascending=False
            )  # sort this dataframe from highest to lowest tauabs
            indexes = list(
                orderk.index
            )  # create a list the original order of the dataframe prior to sorting
            orderk = orderk.reset_index(drop=True)  # reset the index
            inde = (
                []
            )  #  list to put used rows of orderk in based on their oroginal position in the dataframe
            inde2 = []  # list to put used rows of orderk i
            for i in range(len(orderk)):
                if i == 0:
                    order = orderk.head(
                        1
                    )  # loop through all the rows to include those pairs with the highest ktau first
                    inde.append(
                        indexes[i]
                    )  # add used rows to inde (original row value)

                else:
                    if (
                        orderk.v1[i] in list(orderk.v2[:i])
                        or orderk.v1[i] in list(orderk.v1[:i])
                    ) and (
                        orderk.v2[i] in list(orderk.v2[:i])
                        or orderk.v2[i] in list(orderk.v1[:i])
                    ):
                        continue
                    else:
                        inde.append(
                            indexes[i]
                        )  # add used rows to inde (original row value)
                        inde2.append(i)  # add used rows to inde
                        order = pd.concat(
                            [order, orderk.loc[i].to_frame().T], ignore_index=True
                        )  # if conditions are met, add this to the dataframe of tree k

            if len(order) < (dimen - k):
                for i in range(1, len(orderk)):
                    if i in inde2:
                        continue
                    lst = (
                        list(
                            order.v2.astype(str)[
                                order.v1.astype(str) == str(orderk.v1[i])
                            ]
                        )
                        + list(
                            order.v1.astype(str)[
                                order.v2.astype(str) == str(orderk.v1[i])
                            ]
                        )
                        + list(
                            order.v2.astype(str)[
                                order.v1.astype(str) == str(orderk.v2[i])
                            ]
                        )
                        + list(
                            order.v1.astype(str)[
                                order.v2.astype(str) == str(orderk.v2[i])
                            ]
                        )
                    )
                    l1 = list(
                        order.v2.astype(str)[order.v1.astype(str) == str(orderk.v1[i])]
                    ) + list(
                        order.v1.astype(str)[order.v2.astype(str) == str(orderk.v1[i])]
                    )
                    lk = l1.copy()
                    while len(lk) > 0:
                        lk2 = lk.copy()
                        lk = []
                        for j in lk2:
                            ln = list(
                                order.v2.astype(str)[order.v1.astype(str) == j]
                            ) + list(order.v1.astype(str)[order.v2.astype(str) == j])
                            try:
                                ln.remove(str(orderk.v1[i]))
                            except:
                                pass

                            for s in l1:
                                try:
                                    ln.remove(s)
                                except:
                                    pass
                            l1 = l1 + ln
                            lk = lk + ln

                    l2 = list(
                        order.v2.astype(str)[order.v1.astype(str) == str(orderk.v2[i])]
                    ) + list(
                        order.v1.astype(str)[order.v2.astype(str) == str(orderk.v2[i])]
                    )
                    lk = l2.copy()
                    while len(lk) > 0:
                        lk2 = lk.copy()
                        lk = []
                        for j in lk2:
                            ln = list(
                                order.v2.astype(str)[order.v1.astype(str) == j]
                            ) + list(order.v1.astype(str)[order.v2.astype(str) == j])
                            try:
                                ln.remove(str(orderk.v2[i]))
                            except:
                                pass

                            for s in l2:
                                try:
                                    ln.remove(s)
                                except:
                                    pass
                            l2 = l2 + ln
                            lk = lk + ln
                    skip = False
                    for val in l1:
                        if val in l2:
                            skip = True
                            break
                    if skip == True:
                        continue
                    # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):  # ensure that there are no groups of variables that all have a connection with one another
                    #   continue
                    if len(lst) == len(set(lst)):
                        inde.append(
                            indexes[i]
                        )  # add used rows to inde (original row value)
                        order = pd.concat(
                            [order, orderk.loc[i].to_frame().T], ignore_index=True
                        )  # if conditions are met, add this to the dataframe of tree k
                    if (
                        len(order) == dimen - k
                    ):  # end loop when desired number of rows (number of variables - k) has been met
                        break
            orderk = order  # maker order = to tree k
            orderk = orderk.sort_values(
                by="tauabs", ascending=False
            )  # sort this dataframe from highest to lowest tauabs
            v1_k = np.array([v1_k[ind] for ind in inde]).T  # sort array of first node
            v2_k = np.array([v2_k[ind] for ind in inde]).T  # sort array of second node
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()

        else:
            orderk = orderk.sort_values(
                by="tauabs", ascending=False
            )  # sort this dataframe from highest to lowest tauabs
            v1_k = np.array(
                [v1_k[ind] for ind in orderk.index]
            ).T  # sort array of first node
            v2_k = np.array(
                [v2_k[ind] for ind in orderk.index]
            ).T  # sort array of second node
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()
    if vine == "C":
        if len(orderk) > dimen - k:
            subnodes = np.unique(
                np.stack((np.array(orderk.v1), np.array(orderk.v2)))
            )  # see all the unique parent nodes
            taus = []  # list to put the sum of all tauabs in for a specific nodes
            for i in range(len(subnodes)):
                orderksub = orderk[
                    (orderk["v1"].apply(lambda x: x == subnodes[i]))
                    | (orderk["v2"].apply(lambda x: x == subnodes[i]))
                ]
                taus.append(
                    sum(orderksub.tauabs)
                )  # calculate the sum of all taus for a specific variable
            i = np.where(np.array(taus) == max(taus))[0][
                0
            ]  # find where the sum of the taus is the highest to find the vairable to place in the center of the first tree
            orderk = orderk[
                (orderk["v1"].apply(lambda x: x == subnodes[i]))
                | (orderk["v2"].apply(lambda x: x == subnodes[i]))
            ]  # select rows where this node is included
            orderk = orderk.sort_values(
                by="tauabs", ascending=False
            )  # sort this dataframe from highest to lowest tauabs
            inde = list(orderk.index)
            v1_k = np.array([v1_k[ind] for ind in inde]).T  # sort array of first node
            v2_k = np.array([v2_k[ind] for ind in inde]).T  # sort array of second node
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()
        else:
            orderk = orderk.sort_values(by="tauabs", ascending=False)
            v1_k = np.array([v1_k[ind] for ind in orderk.index]).T
            v2_k = np.array([v2_k[ind] for ind in orderk.index]).T
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()

    for i in range(len(order2)):
        u3 = np.vstack(
            (v1_2[:, i], v2_2[:, i])
        ).T  # stacking the combination to fit copula to
        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
        aics.append(aic)  # add AIC to aics
        rhos.append(rho)  # add parameters to rhos
        cops.append(cop)  # add copula to cops

    # add information to dataframe of tree k
    order2["rhos"] = rhos
    order2["cop"] = cops
    order2["AIC"] = aics

    # loop through remaining trees if there are more than 3 variables
    if dimen > 3:
        for k in range(3, dimen):
            order = locals()[
                "order" + str(k - 1)
            ].copy()  # select the struture of the previous tree
            v1s = locals()[
                "v1_" + str(k - 1)
            ].copy()  # select the first nodes of the previous tree
            v2s = locals()[
                "v2_" + str(k - 1)
            ].copy()  # select the second nodes of the previous tree
            # create lists for the variables
            v1_k = []
            v2_k = []
            v1 = []
            v2 = []
            ktau = []
            rhos = []
            cops = []
            node = []
            lk = []
            aics = []
            rk = []
            for i in range(len(order) - 1):
                v1i = order.v1[i].copy()  # parent node 1 from nodei in tree
                v2i = order.v2[i].copy()  # parent node 2 from nodei in tree
                copi = int(order.cop[i])  # copula of nodei
                pari = order.rhos[i]  # parameters of nodei
                for j in (
                    np.where(
                        np.array([item == v1i for item in list(order.v1[i + 1 :])])
                        | np.array([item == v1i for item in list(order.v2[i + 1 :])])
                        | np.array([item == v2i for item in list(order.v1[i + 1 :])])
                        | np.array([item == v2i for item in list(order.v2[i + 1 :])])
                    )[0]
                    + i
                    + 1
                ):  # see if possible connection between nodei and nodej
                    v1i = order.v1[i].copy()  # parent node 1 from nodei in tree
                    v2i = order.v2[i].copy()  # parent node 2 from nodei in tree
                    copj = int(order.cop[j])  # copula of nodei
                    parj = order.rhos[j]  # parameters of nodei
                    nodei = order.node[i]  # nodei
                    nodej = order.node[j]  # nodej
                    v1j = order.v1[j].copy()  # parent node 1 from nodej in tree
                    v2j = order.v2[j].copy()  # parent node 2 from nodej in tree
                    v1.append(nodei)  # new parent nodes 1
                    v2.append(nodej)  # new parent nodes 2
                    n = 2
                    ri = nodei[n + 1 :]  # select the values on the right side of node i
                    rj = nodej[n + 1 :]  # select the values on the right side of node i

                    if "g" in v1j:
                        v1j.remove("g")
                        v2j.remove("g")
                        v1i.remove("g")
                        v2i.remove("g")

                    # define left and right side of the node in tree k
                    if rj == ri:
                        if len(v1j) == 2:
                            lst = nodei[:n] + nodej[:n]
                            r3 = [max(set(lst), key=lst.count)]
                            li = [value for value in nodei[:n] if value not in r3]
                            lj = [value for value in nodej[:n] if value not in r3]
                            r = list(np.unique(ri + r3))
                        else:
                            lst = v1i[:n] + v2i[:n] + v1j[:n] + v2j[:n]
                            for s in ri:
                                lst = [x for x in lst if x != s]
                            r3 = [max(set(lst), key=lst.count)]
                            li = [value for value in nodei[:n] if value not in r3]
                            lj = [value for value in nodej[:n] if value not in r3]
                            r = list(np.unique(ri + r3))
                        l = list(np.unique(li + lj))
                    else:
                        r = list(np.unique(ri + rj))
                        li = [value for value in nodei[:n] if value not in rj]
                        lj = [value for value in nodej[:n] if value not in ri]
                        if li == lj:
                            lst = v1i[:n] + v2i[:n] + v1j[:n] + v2j[:n]
                            lst = [x for x in lst if x != li[0]]
                            l3 = [min(set(lst), key=lst.count)]
                            l = list(np.unique(li + l3))
                        else:
                            l = list(np.unique(li + lj))
                    # select the parent node values
                    ui1 = v1s[:, i]
                    ui2 = v2s[:, i]
                    uj1 = v1s[:, j]
                    uj2 = v2s[:, j]
                    if set(r).issubset(set(v1i)):
                        uni = 1
                    elif set(r).issubset(set(v2i)):
                        uni = 2
                    elif set(rj).issubset(set(v2i[1:])):
                        uni = 2
                    elif set(rj).issubset(set(v1i[1:])):
                        uni = 1
                    if set(r).issubset(set(v1j)):
                        unj = 1
                    elif set(r).issubset(set(v2j)):
                        unj = 2
                    elif set(ri).issubset(set(v2j[1:])):
                        unj = 2
                    elif set(ri).issubset(set(v1j[1:])):
                        unj = 1

                    # calculate the conditional CDF
                    v1igs = hfunc(copi, ui1, ui2, pari, un=uni)
                    v2jgs = hfunc(copj, uj1, uj2, parj, un=unj)
                    ktau.append(
                        abs(st.kendalltau(v1igs, v2jgs)[0])
                    )  # calculate the absolute kendall tau between v1igs and v2jgs, add it to the ktau list
                    # add the parent node values
                    v1_k.append(v1igs)
                    v2_k.append(v2jgs)

                    del uj1, ui1, ui2, uj2

                    node.append(l + ["g"] + r)  # set node name
                    lk.append(l)  # add left side of node
                    rk.append(r)  # add rigth side of node
            orderk = pd.DataFrame(
                {"v1": v1, "v2": v2, "tauabs": ktau, "node": node, "l": lk, "r": rk}
            )  # put information in dataframe for tree k

            if vine == "R" or vine == "D":
                if (
                    len(orderk) > dimen - k
                ):  # the tree will have the number of variables - k number of rows, if this is exceeded in orderk, a selection has to be made based on kendal tau
                    orderk = orderk.sort_values(
                        by="tauabs", ascending=False
                    )  # sort this dataframe from highest to lowest tauabs
                    indexes = list(
                        orderk.index
                    )  # create a list the original order of the dataframe prior to sorting
                    orderk = orderk.reset_index(drop=True)  # reset the index
                    inde = (
                        []
                    )  #  list to put used rows of orderk in based on their oroginal position in the dataframe
                    inde2 = []  # list to put used rows of orderk i
                    for i in range(len(orderk)):
                        if i == 0:
                            order = orderk.head(
                                1
                            )  # loop through all the rows to include those pairs with the highest ktau first
                            inde.append(
                                indexes[i]
                            )  # add used rows to inde (original row value)
                            l = orderk.l[i]

                        else:
                            if (
                                orderk.v1[i] in list(orderk.v2[:i])
                                or orderk.v1[i] in list(orderk.v1[:i])
                            ) and (
                                orderk.v2[i] in list(orderk.v2[:i])
                                or orderk.v2[i] in list(orderk.v1[:i])
                            ):
                                continue
                            else:
                                inde.append(
                                    indexes[i]
                                )  # add used rows to inde (original row value)
                                inde2.append(i)  # add used rows to inde
                                order = pd.concat(
                                    [order, orderk.loc[i].to_frame().T],
                                    ignore_index=True,
                                )  # if conditions are met, add this to the dataframe of tree k
                                l = l + orderk.l[i]
                    if len(order) < (dimen - k):
                        for i in range(1, len(orderk)):
                            if i in inde2:
                                continue
                            lst = (
                                list(
                                    order.v2.astype(str)[
                                        order.v1.astype(str) == str(orderk.v1[i])
                                    ]
                                )
                                + list(
                                    order.v1.astype(str)[
                                        order.v2.astype(str) == str(orderk.v1[i])
                                    ]
                                )
                                + list(
                                    order.v2.astype(str)[
                                        order.v1.astype(str) == str(orderk.v2[i])
                                    ]
                                )
                                + list(
                                    order.v1.astype(str)[
                                        order.v2.astype(str) == str(orderk.v2[i])
                                    ]
                                )
                            )
                            l1 = list(
                                order.v2.astype(str)[
                                    order.v1.astype(str) == str(orderk.v1[i])
                                ]
                            ) + list(
                                order.v1.astype(str)[
                                    order.v2.astype(str) == str(orderk.v1[i])
                                ]
                            )
                            lk = l1.copy()
                            while len(lk) > 0:
                                lk2 = lk.copy()
                                lk = []
                                for j in lk2:
                                    ln = list(
                                        order.v2.astype(str)[order.v1.astype(str) == j]
                                    ) + list(
                                        order.v1.astype(str)[order.v2.astype(str) == j]
                                    )
                                    try:
                                        ln.remove(str(orderk.v1[i]))
                                    except:
                                        pass

                                    for s in l1:
                                        try:
                                            ln.remove(s)
                                        except:
                                            pass
                                    l1 = l1 + ln
                                    lk = lk + ln

                            l2 = list(
                                order.v2.astype(str)[
                                    order.v1.astype(str) == str(orderk.v2[i])
                                ]
                            ) + list(
                                order.v1.astype(str)[
                                    order.v2.astype(str) == str(orderk.v2[i])
                                ]
                            )
                            lk = l2.copy()
                            while len(lk) > 0:
                                lk2 = lk.copy()
                                lk = []
                                for j in lk2:
                                    ln = list(
                                        order.v2.astype(str)[order.v1.astype(str) == j]
                                    ) + list(
                                        order.v1.astype(str)[order.v2.astype(str) == j]
                                    )
                                    try:
                                        ln.remove(str(orderk.v2[i]))
                                    except:
                                        pass

                                    for s in l2:
                                        try:
                                            ln.remove(s)
                                        except:
                                            pass
                                    l2 = l2 + ln
                                    lk = lk + ln
                            skip = False
                            for val in l1:
                                if val in l2:
                                    skip = True
                                    break
                            if skip == True:
                                continue
                            # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):
                            # continue
                            if len(lst) == len(set(lst)):
                                order = pd.concat(
                                    [order, orderk.loc[i].to_frame().T],
                                    ignore_index=True,
                                )  # if conditions are met, add this to the dataframe of tree k
                                inde.append(
                                    indexes[i]
                                )  # add used rows to inde (original row value)
                            if (
                                len(order) == dimen - k
                            ):  # end loop when desired number of rows (number of variables - k) has been met
                                break
                    orderk = order
                    orderk = orderk.sort_values(
                        by="tauabs", ascending=False
                    )  # sort this dataframe from highest to lowest tauabs

                    v1_k = np.array(
                        [v1_k[ind] for ind in inde]
                    ).T  # sort array of first node
                    v2_k = np.array(
                        [v2_k[ind] for ind in inde]
                    ).T  # sort array of second node
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1
                    for j in range(len(orderk)):
                        u3 = np.vstack(
                            (v1_k[:, j], v2_k[:, j])
                        ).T  # stacking the combination to fit copula to
                        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
                        aics.append(aic)  # add AIC to aics
                        rhos.append(rho)  # add parameters to rhos
                        cops.append(cop)  # add copula to cops

                    # add information to dataframe of tree k
                    orderk["rhos"] = rhos
                    orderk["cop"] = cops
                    orderk["AIC"] = aics
                    locals()["v1_" + str(k)] = v1_k
                    locals()["v2_" + str(k)] = v2_k
                    locals()["order" + str(k)] = orderk

                else:
                    orderk = orderk.sort_values(
                        by="tauabs", ascending=False
                    )  # sort this dataframe from highest to lowest tauabs
                    v1_k = np.array(
                        [v1_k[ind] for ind in orderk.index]
                    ).T  # sort array of first node
                    v2_k = np.array(
                        [v2_k[ind] for ind in orderk.index]
                    ).T  # sort array of second node
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1

                    for j in range(len(orderk)):
                        u3 = np.vstack(
                            (v1_k[:, j], v2_k[:, j])
                        ).T  # stacking the combination to fit copula to
                        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
                        aics.append(aic)  # add AIC to aics
                        rhos.append(rho)  # add parameters to rhos
                        cops.append(cop)  # add copula to cops

                    # add information to dataframe of tree k
                    orderk["rhos"] = rhos
                    orderk["cop"] = cops
                    orderk["AIC"] = aics
                    locals()["v1_" + str(k)] = v1_k
                    locals()["v2_" + str(k)] = v2_k
                    locals()["order" + str(k)] = orderk

            if vine == "C":
                if len(orderk) > dimen - k:
                    subnodes = np.unique(
                        np.stack((np.array(orderk.v1), np.array(orderk.v2)))
                    )  # see all the unique parent nodes
                    taus = (
                        []
                    )  # list to put the sum of all tauabs in for a specific nodes
                    for i in range(len(subnodes)):
                        orderksub = orderk[
                            (orderk["v1"].apply(lambda x: x == subnodes[i]))
                            | (orderk["v2"].apply(lambda x: x == subnodes[i]))
                        ]
                        taus.append(
                            sum(orderksub.tauabs)
                        )  # calculate the sum of all taus for a specific variable
                    i = np.where(np.array(taus) == max(taus))[0][
                        0
                    ]  # find where the sum of the taus is the highest to find the vairable to place in the center of the first tree
                    orderk = orderk[
                        (orderk["v1"].apply(lambda x: x == subnodes[i]))
                        | (orderk["v2"].apply(lambda x: x == subnodes[i]))
                    ]  # select rows where this node is included
                    orderk = orderk.sort_values(
                        by="tauabs", ascending=False
                    )  # sort this dataframe from highest to lowest tauabs
                    inde = list(orderk.index)
                    v1_k = np.array(
                        [v1_k[ind] for ind in inde]
                    ).T  # sort array of first node
                    v2_k = np.array(
                        [v2_k[ind] for ind in inde]
                    ).T  # sort array of second node
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1
                    v1_k = v1_k.copy()
                    v2_k = v2_k.copy()
                    for j in range(len(orderk)):
                        u3 = np.vstack(
                            (v1_k[:, j], v2_k[:, j])
                        ).T  # stacking the combination to fit copula to
                        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
                        aics.append(aic)  # add AIC to aics
                        rhos.append(rho)  # add parameters to rhos
                        cops.append(cop)  # add copula to cops

                    # add information to dataframe of tree k
                    orderk["rhos"] = rhos
                    orderk["cop"] = cops
                    orderk["AIC"] = aics
                    locals()["v1_" + str(k)] = v1_k
                    locals()["v2_" + str(k)] = v2_k
                    locals()["order" + str(k)] = orderk

                else:
                    orderk = orderk.sort_values(by="tauabs", ascending=False)
                    v1_k = np.array([v1_k[ind] for ind in orderk.index]).T
                    v2_k = np.array([v2_k[ind] for ind in orderk.index]).T
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1

                    for j in range(len(orderk)):
                        u3 = np.vstack(
                            (v1_k[:, j], v2_k[:, j])
                        ).T  # stacking the combination to fit copula to
                        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
                        aics.append(aic)  # add AIC to aics
                        rhos.append(rho)  # add parameters to rhos
                        cops.append(cop)  # add copula to cops

                    # add information to dataframe of tree k
                    orderk["rhos"] = rhos
                    orderk["cop"] = cops
                    orderk["AIC"] = aics
                    locals()["v1_" + str(k)] = v1_k
                    locals()["v2_" + str(k)] = v2_k
                    locals()["order" + str(k)] = orderk

    order = pd.DataFrame(columns=order1.columns)  # dataframe to add all trees to
    for i in range(1, dimen):
        order = pd.concat([order, locals()["order" + str(i)]]).reset_index(
            drop=True
        )  # adding trees in dataframe

    # create array for the tree structure and copula matrices
    a = np.empty((dimen, dimen))
    c = np.empty((dimen, dimen))
    a[:] = np.nan
    c[:] = np.nan

    order["used"] = 0  # set used to 0 for nodes that have not been used
    for i in list(range(dimen - 1))[
        ::-1
    ]:  # create the matix for the tree structure starting with the last tree
        k1 = sorted(
            np.array(
                order[(order.tree == i) & (order["used"] == 0)].node.iloc[0][:2]
            ).astype(int)
        )[::-1]
        order.loc[(order["tree"] == i) & (order["used"] == 0), "used"] = 1
        t1 = i - 1
        ii = dimen - 2 - i
        a[i : dimen - ii, ii] = k1
        s = k1[-1]
        for j in list(range(0, i))[::-1]:  # continueing from tree i to the first tree
            orde = order[(order.tree == j) & (order["used"] == 0)]
            for k in range(len(orde)):
                arr = np.array(orde.node.iloc[k][:2]).astype(int)
                if np.isin(s, arr) == True:
                    inde = orde.iloc[k].name
                    a[j, ii] = arr[arr != s][0]
                    order["used"][inde] = 1

    a[0, dimen - 1] = a[0, dimen - 2]  # set first sample in sampling order
    orderk = pd.DataFrame(columns=order.columns)
    # create array for the copula parameter matrix
    p = np.empty((dimen, dimen))
    p[:] = np.nan
    p = p.astype(object)
    # fill array p with the parameters and c with the copulas, corresponding to the structure in c
    for i in list(range(dimen - 1)):
        orde = order[order.tree == i]
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)
            for j in range(len(orde)):
                arr = np.array(orde.node.iloc[j][:2]).astype(int)
                if sum(np.isin(akn, arr)) == 2:
                    orderj = order.loc[[orde.index[j]]]
                    p[i, k] = orderj.rhos.iloc[0]
                    c[i, k] = orderj.cop.iloc[0]
                    if i == 0:
                        orderj.node.iloc[0] = list(akn)
                    else:
                        orderj.node.iloc[0] = (
                            list(akn) + ["|"] + list((ak.astype(int)[:i])[::-1])
                        )
                    orderk = pd.concat([orderk, orderj]).reset_index(drop=True)

    # print the copula structure if print == True
    if printing == True:
        for i in list(range(0, dimen - 1)):
            orde = orderk[orderk.tree == i].reset_index(drop=True)
            print("** Tree: ", i + 1)
            for j in range(len(orde)):
                if i != 0:
                    nodej = ",".join(map(str, orde.node[j])).replace(",|,", "|")
                else:
                    nodej = ",".join(map(str, orde.node[j]))
                print(
                    nodej,
                    " ---> ",
                    copulas[int(orde.cop[j])],
                    ": parameters = ",
                    orde.rhos[j],
                )

    return a, p, c



[docs]
def density_vinecop(u, M, P, C):
    """
    Computes the density function of a vine copula.

    Arguments:
        *u* :  A 2-d numpy array containing the samples for which the PDF will be calculated.

        *M* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

        *P* : Parameters of the bivariate copulae provided as a triangular matrix.

        *C* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).




    Returns:
     *F* :  A 1-d numpy array containing the probability density function of the vine copula

    """
    U = u[:,list(np.diag(M[::-1])[::-1].astype(int))]

    a = M.copy()
    p = P.copy()
    c = C.copy()
    s = len(u)
    Ms = np.flipud(a)  # flip structure matrix
    P = np.flipud(p)  # flip parameter matrix
    C = np.flipud(c)  # flip copula matrix

    replace = {}  # dictionary for relabeling martix
    for i in range(int(max(np.unique(Ms)) + 1)):
        val = max(np.unique(Ms)) - i
        replace.update({Ms[i, i]: val})  # relabel

    Ms = np.nan_to_num(Ms, nan=int(max(np.unique(Ms)) + 1))
    replace_func = np.vectorize(
        lambda x: replace.get(x, x)
    )  # Create a vectorized function for replacement
    M = replace_func(Ms)  # relabel
    M[M == np.max(Ms)] = np.nan
    Mm = M.copy()
    # max matrix
    for i in range(M.shape[0]):
        for k in range(M.shape[0]):
            if k == i:
                continue
            if i == 0:
                continue
            Mm[i, k] = max(Mm[i:, k])

    # Vdirect
    Vdir = np.empty((s, M.shape[0], M.shape[0]))
    Vdir[:] = np.nan
    # Vindirect
    Vindir = np.empty((s, M.shape[0], M.shape[0]))
    Vindir[:] = np.nan
    # Z2
    Z2 = np.empty((s, M.shape[0], M.shape[0]))
    Z2[:] = np.nan
    # Z1
    Z1 = np.empty((s, M.shape[0], M.shape[0]))
    Z2[:] = np.nan
    Vdir[:, -1, :] =  np.flip(U.copy(), 1)
    X = np.flip(U.copy(), 1)
    n = M.shape[0] - 1
    F = 1

    for k in list(reversed((range(0,n)))):
        for i in range(k+1, n+1)[::-1]:

            Z1[:, i, k] = Vdir[:, i, k]
            if M[i, k] == Mm[i, k]:
                Z2[:, i, k] = Vdir[:, i, int(n - Mm[i, k])]
            else:
                Z2[:, i, k] = Vindir[:, i, int(n - Mm[i, k])]
            

            F = F * PDF(int(C[i, k]),np.vstack((Z1[:, i, k], Z2[:, i, k])).T,P[i, k])
            Vdir[:, int(i - 1), k] = hfunc(
                int(C[i, k]), Z1[:, i, k], Z2[:, i, k], P[i, k], un=2
            )
            Vindir[:, int(i - 1), k] = hfunc(
                int(C[i, k]), Z1[:, i, k], Z2[:, i, k], P[i, k], un=1
            )
    return F


    

# %% fitting vine copula with sspecific structure

[docs]
def fit_vinecopstructure(u1, copsi, a):
    """
    Fit a regular vine copula to data based on a known vine structure matrix.

    Arguments:
        *u1* :  the data, provided as a numpy array where each column contains a seperate variable (eg. u1,u2,...,un), which have already been transferred to standard uniform margins (0<= u <= 1)

        *copsi* : A list of integers referring to the copulae of interest for which the fit has to be evauluated in the vine copula. eg. a list of [1, 10] refers to the Gaussian and Frank copula (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

        *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

    Returns:
     *p* : Parameters of the bivariate copulae provided as a triangular matrix.

     *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

    """

    dimen = a.shape[0]  # number of variables (number of columns)
    order = pd.DataFrame(
        columns=["node", "l", "r", "tree"]
    )  # dataframe for vinecopula information
    s = 0
    for i in list(range(dimen - 1)):
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]  # Edge
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)  # Edge
            if i == 0:
                single_row_values = {
                    "node": list(akn),
                    "l": akn[0],
                    "r": akn[1],
                    "tree": i,
                }
            else:
                single_row_values = {
                    "node": list(akn) + ["|"] + list((ak.astype(int)[:i])[::-1]),
                    "l": list(akn),
                    "r": list((ak.astype(int)[:i])[::-1]),
                    "tree": i,
                }

            order.loc[s] = single_row_values
            s = s + 1

    for t in list(range(dimen - 1)):  # loop through trees
        orderk = order[order.tree == t].reset_index(drop=True)  # select tre
        if t == 0:  # first tree
            orderk["v1"] = orderk.l
            orderk["v2"] = orderk.r
            rhos = []
            v1_1 = []
            v2_1 = []
            cops = []
            tauabs = []

            for j in range(len(orderk)):
                v1i = int(orderk.v1[j])  # first node
                v2i = int(orderk.v2[j])  # second node
                tauabs.append(
                abs(st.kendalltau(u1[:, v1i], u1[:, v2i])[0])
            ) 
                u3 = np.vstack(
                    (u1[:, v1i], u1[:, v2i])
                ).T  # stacking the combination to fit copula to
                cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
                rhos.append(rho)  # add parameters to rhos
                cops.append(cop)  # add copula to cops
                v1_1.append(u1[:, v1i])  # add array of first node
                v2_1.append(u1[:, v2i])  # add array of second node
            v1_1 = np.array(v1_1).T  # v1
            v2_1 = np.array(v2_1).T  # v2

            # add information to dataframe of the first tree
            orderk["rhos"] = rhos
            orderk["cop"] = cops
            orderk["tauabs"] = tauabs
            orderk = orderk.sort_values(by="tauabs", ascending=False)
            v1_1 = v1_1[:, orderk.index]  # sort array of first node
            v2_1 =  v2_1[:, orderk.index] # sort array of second node
            orderk = orderk.reset_index(
        drop=True
    ) 
            
            
            
            
            

        else:

            v1k = []
            v2k = []
            tauabs = []
            for j in range(len(orderk)):
                orderk2 = locals()["order" + str(t)] # Define possible nodes of edge
                l = orderk.l[j] + orderk.r[j]
                subnodes = []
                for k in range(len(orderk2)):
                    subnodes.append(sum(1 for item in orderk2.node[k] if item in l))
                subnodes = np.array(subnodes) == len(l) - 1
                orderk2 = orderk2[subnodes].reset_index(drop=True)
                v1k.append(orderk2.node[0])  # node 1
                v2k.append(orderk2.node[1])  # node 2
            orderk["v1"] = v1k
            orderk["v2"] = v2k
            orderk2 = locals()["order" + str(t)].reset_index(drop=True)
            v1s = locals()["v1_" + str(t)].copy()
            v2s = locals()["v2_" + str(t)].copy()
            v1_k = []
            v2_k = []
            rhos = []
            cops = []

            for k in range(len(orderk)):  # fitting copulas
                r = orderk.r[k]
                nodei = orderk.v1[k]  # nodei
                nodej = orderk.v2[k]  # nodej
                i = orderk2[orderk2["node"].apply(lambda x: x == nodei)].index[0]
                j = orderk2[orderk2["node"].apply(lambda x: x == nodej)].index[0]
                copj = int(orderk2.cop[j])  # copula of nodej
                parj = orderk2.rhos[j]  # parameters of nodej
                copi = int(orderk2.cop[i])  # copula of nodei
                pari = orderk2.rhos[i]  # parameters of nodei
                ri = orderk2.r[i]
                rj = orderk2.r[j]
                v1i = orderk2.v1[i]  # parent node 1 from nodei in tree
                v2i = orderk2.v2[i]
                v1j = orderk2.v1[j]  # parent node 1 from nodej in tree
                v2j = orderk2.v2[j]  # parent node 2 from nodej in tree
                ui1 = v1s[:, i]
                ui2 = v2s[:, i]
                uj1 = v1s[:, j]
                uj2 = v2s[:, j]
                if t > 1:
                    if "g" in v1j:
                        v1j.remove("g")
                        v2j.remove("g")
                        v1i.remove("g")
                        v2i.remove("g")
                    if set(r).issubset(set(v1i)):
                        uni = 1
                    elif set(r).issubset(set(v2i)):
                        uni = 2
                    elif set(rj).issubset(set(v2i[1:])):
                        uni = 2
                    elif set(rj).issubset(set(v1i[1:])):
                        uni = 1
                    if set(r).issubset(set(v1j)):
                        unj = 1
                    elif set(r).issubset(set(v2j)):
                        unj = 2
                    elif set(ri).issubset(set(v2j[1:])):
                        unj = 2
                    elif set(ri).issubset(set(v1j[1:])):
                        unj = 1

                else:
                    lst = orderk2.node[i] + orderk2.node[j]
                    s = max(set(lst), key=lst.count)
                    ui1 = v1_1[:, i]
                    ui2 = v2_1[:, i]
                    uj1 = v1_1[:, j]
                    uj2 = v2_1[:, j]
                    if v1i == s:
                        uni = 1
                        vi1 = v2i
                    else:
                        uni = 2
                        vi1 = v1i
                    if v1j == s:
                        unj = 1
                        vj1 = v2j
                    else:
                        unj = 2
                        vj1 = v1j
                # calculate the conditional CDF
                v1igs = hfunc(copi, ui1, ui2, pari, un=uni)
                v2jgs = hfunc(copj, uj1, uj2, parj, un=unj)
                u3 = np.vstack(
                    (v1igs, v2jgs)
                ).T  # stacking the combination to fit copula to
                cop, rho, aic = bestcop(copsi, u3)  # fit the best copula

                rhos.append(rho)  # add parameters to rhos
                cops.append(cop)  # add copula to cops
                v1_k.append(v1igs)
                v2_k.append(v2jgs)
            orderk["rhos"] = rhos
            orderk["cop"] = cops

            locals()["v1_" + str(t + 1)] = np.array(v1_k).T
            locals()["v2_" + str(t + 1)] = np.array(v2_k).T

        locals()["order" + str(t + 1)] = orderk
    order = pd.DataFrame(columns=orderk.columns)
    for i in range(1, dimen):
        order = pd.concat([order, locals()["order" + str(i)]]).reset_index(drop=True)

    p = np.empty((dimen, dimen))  # parameter array
    p[:] = np.nan
    p = p.astype(object)
    c = np.empty((dimen, dimen))  # copula array
    c[:] = np.nan
    for i in list(range(dimen - 1)):  # fil the copula and parameter array
        orde = order[order.tree == i]
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)
            for j in range(len(orde)):
                arr = np.array(orde.node.iloc[j][:2]).astype(int)
                if sum(np.isin(akn, arr)) == 2:
                    orderj = order.loc[[orde.index[j]]]
                    p[i, k] = orderj.rhos.iloc[0]
                    c[i, k] = orderj.cop.iloc[0]
    return p, c



# %% Sampling vine copula

[docs]
def sample_vinecop(a, p, c, s):
    """
    Generate random samples from an R-vine.

    Arguments:
        *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

        *p* : Parameters of the bivariate copulae provided as a triangular matrix.

        *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

        *s* : number of samples to generate, provided as a positive scalar integer.



    Returns:
     *X2* :  the randomly sampled data data, provided as a numpy array where each column contains samples of a seperate variable (eg. u1,u2,...,un).

    """
    # Reference: Dißmann et al. 2013
    Ms = np.flipud(a)  # flip structure matrix
    P = np.flipud(p)  # flip parameter matrix
    C = np.flipud(c)  # flip copula matrix
    replace = {}  # dictionary for relabeling martix
    for i in range(int(max(np.unique(Ms)) + 1)):
        val = max(np.unique(Ms)) - i
        # Ms[k,k]
        replace.update({Ms[i, i]: val})  # relabel

    Ms = np.nan_to_num(Ms, nan=int(max(np.unique(Ms)) + 1))
    replace_func = np.vectorize(
        lambda x: replace.get(x, x)
    )  # Create a vectorized function for replacement
    M = replace_func(Ms)  # relabel
    M[M == np.max(Ms)] = np.nan
    Mm = M.copy()
    # max matrix
    for i in range(M.shape[0]):
        for k in range(M.shape[0]):
            if k == i:
                continue
            if i == 0:
                continue
            Mm[i, k] = max(Mm[i:, k])

    # Vdirect
    Vdir = np.empty((s, M.shape[0], M.shape[0]))
    Vdir[:] = np.nan
    # Vindirect
    Vindir = np.empty((s, M.shape[0], M.shape[0]))
    Vindir[:] = np.nan
    # Z2
    Z2 = np.empty((s, M.shape[0], M.shape[0]))
    Z2[:] = np.nan
    # Z1
    Z1 = np.empty((s, M.shape[0], M.shape[0]))
    Z2[:] = np.nan
    U = np.random.uniform(0, 1, (s, M.shape[0]))  # random uniform
    Vdir[:, -1, :] = U.copy()
    X = np.flip(U.copy(), 1)
    n = M.shape[0] - 1
    # sampling algorithm
    for k in range(n)[::-1]:
        for i in range(k + 1, n + 1):
            if M[i, k] == Mm[i, k]:
                Z2[:, i, k] = Vdir[:, i, int(n - Mm[i, k])]
            else:
                Z2[:, i, k] = Vindir[:, i, int(n - Mm[i, k])]
            Vdir[:, n, k] = hfuncinverse(
                int(C[i, k]), Z2[:, i, k], Vdir[:, n, k], P[i, k], un=2
            )
        X[:, int(n - k)] = Vdir[:, n, k]
        for i in range(k + 1, n + 1)[::-1]:
            Z1[:, i, k] = Vdir[:, i, k]
            Vdir[:, int(i - 1), k] = hfunc(
                int(C[i, k]), Z1[:, i, k], Z2[:, i, k], P[i, k], un=2
            )
            Vindir[:, int(i - 1), k] = hfunc(
                int(C[i, k]), Z1[:, i, k], Z2[:, i, k], P[i, k], un=1
            )

    # Put X in the original order of the data
    replacedf = pd.DataFrame(list(replace.items()), columns=["Original", "Replacement"])
    replacedf = replacedf.sort_values(by="Original")
    X2 = np.array([])
    for i in replacedf.Replacement:
        if len(X2) == 0:
            X2 = X[:, int(i)].reshape(len(X), 1)
        else:
            X2 = np.hstack((X2, X[:, int(i)].reshape(len(X), 1)))
    return X2



# %% Sampling conditonal vine copula

[docs]
def sample_vinecopconditional(a, p, c, s, Xc):
    """
    Generate conditional samples from an R-vine based on a provided sampling order

    Arguments:
         *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

         *p* : Parameters of the bivariate copulae provided as a triangular matrix.

         *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

         *s* : number of samples to generate, provided as a positive scalar integer.

         *XC*: the values of the variables on which the conditional sample has to generated, provided as a 1d array that contains the the values ordered in terms of the sampling order.

    Returns:
     *X2* :  the randomly sampled data data, provided as a numpy array where each column contains samples of a seperate variable (eg. u1,u2,...,un).

    """

    # Reference: Dißmann et al. 2013
    Ms = np.flipud(a)  # flip structure matrix
    P = np.flipud(p)  # flip parameter matrix
    C = np.flipud(c)  # flip copula matrix
    replace = {}  # dictionary for relabeling martix
    for i in range(int(max(np.unique(Ms)) + 1)):
        val = max(np.unique(Ms)) - i
        # Ms[k,k]
        replace.update({Ms[i, i]: val})  # relabel

    Ms = np.nan_to_num(Ms, nan=int(max(np.unique(Ms)) + 1))
    replace_func = np.vectorize(
        lambda x: replace.get(x, x)
    )  # Create a vectorized function for replacement
    M = replace_func(Ms)  # relabel
    M[M == np.max(Ms)] = np.nan
    Mm = M.copy()
    # max matrix
    for i in range(M.shape[0]):
        for k in range(M.shape[0]):
            if k == i:
                continue
            if i == 0:
                continue
            Mm[i, k] = max(Mm[i:, k])

    # Vdirect
    Vdir = np.empty((s, M.shape[0], M.shape[0]))
    Vdir[:] = np.nan
    # Vindirect
    Vindir = np.empty((s, M.shape[0], M.shape[0]))
    Vindir[:] = np.nan
    # Z2
    Z2 = np.empty((s, M.shape[0], M.shape[0]))
    Z2[:] = np.nan
    # Z1
    Z1 = np.empty((s, M.shape[0], M.shape[0]))
    Z2[:] = np.nan
    # combine random uniform data with conditional input
    U = np.hstack(
        (
            np.random.uniform(0.0001, 0.99999, (s, M.shape[0] - len(Xc))),
            np.flip(np.tile(Xc, (s, 1)).copy(), 1),
        )
    )
    Vdir[:, -1, :] = U.copy()
    X = np.flip(U.copy(), 1)
    n = M.shape[0] - 1

    # sampling algorithm
    for k in range(n)[::-1]:
        for i in range(k + 1, n + 1):
            i = int(i)
            if M[i, k] == Mm[i, k]:
                Z2[:, i, k] = Vdir[:, i, int(n - Mm[i, k])]
            else:
                Z2[:, i, k] = Vindir[:, i, int(n - Mm[i, k])]
            if k <= n - len(Xc):
                Vdir[:, n, k] = hfuncinverse(
                    int(C[i, k]), Z2[:, i, k], Vdir[:, n, k], P[i, k], un=2
                )

  
        X[:, int(n - k)] = Vdir[:, n, k]
        for i in range(k + 1, n + 1)[::-1]:
            Z1[:, i, k] = Vdir[:, i, k]
            
            Vdir[:, int(i - 1), k] = hfunc(
                int(C[i, k]), Z1[:, i, k], Z2[:, i, k], P[i, k], un=2
            )
            
            Vindir[:, int(i - 1), k] = hfunc(
                int(C[i, k]), Z1[:, i, k], Z2[:, i, k], P[i, k], un=1
            )
           
    
    # Put X in the original order of the data
    replacedf = pd.DataFrame(list(replace.items()), columns=["Original", "Replacement"])
    replacedf = replacedf.sort_values(by="Original")
    X2 = np.array([])
    for i in replacedf.Replacement:
        if len(X2) == 0:
            X2 = X[:, int(i)].reshape(len(X), 1)
        else:
            X2 = np.hstack((X2, X[:, int(i)].reshape(len(X), 1)))
    return X2



# %%sampling orders



[docs]
def samplingorder(a):
    """
    Provides all the different sampling orders that are possible for the fitted vine-copula.

    Arguments:
        *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

        *p* : Parameters of the bivariate copulae provided as a triangular matrix.

        *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula (see...refer to where this information would be)



    Returns:
     *sortingorder* :  A list of the different sampling orders available for the fitted vine-copula

    """

    dimen = a.shape[0]  # dimension of data.
    order = pd.DataFrame(
        columns=["node", "l", "r", "tree"]
    )  # dataframe for vine copula structure.
    s = 0
    for i in list(range(dimen - 1)):
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]  # edges
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)
            if i == 0:
                single_row_values = {
                    "node": list(akn),
                    "l": akn[0],
                    "r": akn[1],
                    "tree": i,
                }
            else:
                single_row_values = {
                    "node": list(akn) + ["|"] + list((ak.astype(int)[:i])[::-1]),
                    "l": list(akn),
                    "r": list((ak.astype(int)[:i])[::-1]),
                    "tree": i,
                }

            order.loc[s] = single_row_values
            s = s + 1
    combinations = list(
        product([True, False], repeat=dimen - 1)
    )  # all diffferent sampling routes.
    sortingorder = []
    for q in combinations:
        a = np.empty((dimen, dimen))
        a[:] = np.nan
        order["used"] = 0
        for i in list(range(dimen - 1))[::-1]:
            k1 = sorted(
                np.array(
                    order[(order.tree == i) & (order["used"] == 0)].node.iloc[0][:2]
                ).astype(int),
                reverse=q[i],
            )
            order.loc[(order["tree"] == i) & (order["used"] == 0), "used"] = 1
            ii = dimen - 2 - i
            a[i : dimen - ii, ii] = k1
            s = k1[-1]
            for j in list(range(0, i))[::-1]:
                orde = order[(order.tree == j) & (order["used"] == 0)]
                for k in range(len(orde)):
                    arr = np.array(orde.node.iloc[k][:2]).astype(int)
                    if np.isin(s, arr) == True:
                        inde = orde.iloc[k].name
                        a[j, ii] = arr[arr != s][0]
                        order["used"][inde] = 1
        a[0, dimen - 1] = a[0, dimen - 2]
        sortingorder.append(list(np.diag(a[::-1])[::-1]))  # add unique sampling orders
    return sortingorder



# %%matrices of specific sampling order



[docs]
def samplingmatrix(a, c, p, sorder):
    """
    Provides the triangular matrices for which the samples can be generated based on the specific sampling order.

    Arguments:
        *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer rffers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

        *p* : Parameters of the bivariate copulae provided as a triangular matrix.

        *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

        *sorder* :  A list containing the specific sampling order of interest

    Returns:
     *ai* : The vine tree structure, based on the sampling order, provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

     *pi* : Parameters of the bivariate copulae, based on the sampling order,  provided as a triangular matrix.

     *ci* : The types of the bivariate copulae, based on the sampling order,  provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

    """

    dimen = a.shape[0]  # dimension of data
    order = pd.DataFrame(
        columns=["node", "par", "cop", "l", "r", "tree"]
    )  # dataframe for vine copula structure
    s = 0
    for i in list(range(dimen - 1)):
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)
            if i == 0:
                single_row_values = {
                    "node": list(akn),
                    "par": p[i, k],
                    "cop": int(c[i, k]),
                    "l": akn[0],
                    "r": akn[1],
                    "tree": i,
                }
            else:
                single_row_values = {
                    "node": list(akn) + ["|"] + list((ak.astype(int)[:i])[::-1]),
                    "par": p[i, k],
                    "cop": int(c[i, k]),
                    "l": list(akn),
                    "r": list((ak.astype(int)[:i])[::-1]),
                    "tree": i,
                }

            order.loc[s] = single_row_values
            s = s + 1
    ai = np.empty((dimen, dimen))  # array for vine copula structure
    ai[:] = np.nan
    order["used"] = 0
    ci = np.empty((dimen, dimen))  # array for vine copula copulas
    ci[:] = np.nan
    pi = np.empty((dimen, dimen))  # array for parameters
    pi[:] = np.nan
    pi = pi.astype(object)
    for i in list(range(dimen - 1))[::-1]:
        if i == 0:
            k1 = order[(order.tree == i) & (order["used"] == 0)].node.iloc[0]
        else:
            k1 = order[(order.tree == i) & (order["used"] == 0)].l.iloc[0]

        if sorder[i + 1] == max(k1):
            k1 = sorted(k1, reverse=False)
        else:
            k1 = sorted(k1, reverse=True)
        ii = dimen - 2 - i
        ai[i : dimen - ii, ii] = k1
        ci[i, ii] = order[(order.tree == i) & (order["used"] == 0)].cop.iloc[0]
        pi[i, ii] = order[(order.tree == i) & (order["used"] == 0)].par.iloc[0]
        order.loc[(order["tree"] == i) & (order["used"] == 0), "used"] = 1
        s = k1[-1]
        for j in list(range(0, i))[::-1]:
            orde = order[(order.tree == j) & (order["used"] == 0)]
            for k in range(len(orde)):
                arr = np.array(orde.node.iloc[k][:2]).astype(int)
                if np.isin(s, arr) == True:
                    inde = orde.iloc[k].name
                    ai[j, ii] = arr[arr != s][0]
                    ci[j, ii] = order["cop"][inde]
                    pi[j, ii] = order["par"][inde]
                    order["used"][inde] = 1
    ai[0, dimen - 1] = ai[0, dimen - 2]
    return ai, pi, ci



# %%

[docs]
def fit_conditionalvine(u1, vint, copsi, vine="R", condition=1, printing=True):
    """
    Fit a regular vine copula which allows for a conditional sample of a variable of interest.

    Arguments:
        *u1* :  the data, provided as a numpy array where each column contains a seperate variable (eg. u1,u2,...,un), which have already been transferred to standard uniform margins (0<= u <= 1)

        *vint* : the variables of interest, provided as an integere or list that refers to the variables column numbers in u1, where the first column is 0 and the second column is 1, etc.

        *copsi* : A list of integers referring to the copulae of interest for which the fit has to be evauluated in the vine copula. eg. a list of [1, 10] refers to the Gaussian and Frank copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

        *vine* : The type of vine copula that needs to be fit, either 'R', 'D', or 'C'

        *condition* : condition = 1 indicates that vint needs to be the conditionalized variables (at the end of the sampling order), condition = 2 indicates that vint need to be the conditioning variables (at the start of the sampling order)

        *printing*: True if the fitted copula should be printed and False if not
     Returns:
      *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

      *p* : Parameters of the bivariate copulae provided as a triangular matrix.

      *c* : The types of the bivariate copulae provided as a triangular matrix, composed of integers referring to the copulae with the best fit. eg. a 1 refers to the gaussian copula  (see `Table 1 <https://vinecopulas.readthedocs.io/en/latest/vinecopulas.html#Fitting-a-Vine-Copula>`__).

    """

    # Reference: Dißmann et al. 2013 adapted
    dimen = u1.shape[1]  # number of variables (number of columns)
    if condition == 1:  # if vint is the conditionalized variable
        X = vint
        try:  # if X is an integer put it in a list
            len(X)

        except:
            X = [X]
        Y = list(range(dimen))
        for i in X:
            Y.remove(i)  # Define Y
    elif condition == 2:  # if vint is the conditioning variable
        Y = vint
        try:  # if Y is an integer put it in a list
            len(Y)

        except:
            Y = [Y]
        X = list(range(dimen))
        for i in Y:
            X.remove(i)  # Define X
    v1 = []  # list for variable 1
    v2 = []  # list for  variable 2
    tauabs = []  # list for the absolute kendal tau between v1 and v2

    for i in range(dimen - 1):
        for j in range(i + 1, dimen):
            v1.append(int(i))  # add variable to v1
            v2.append(int(j))  # add variable to v2
            tauabs.append(
                abs(st.kendalltau(u1[:, i], u1[:, j])[0])
            )  # calculate the absolute kendall tau between v1 and v2 and add it to the taubs list

    order1 = pd.DataFrame(
        {"v1": v1, "v2": v2, "tauabs": tauabs}
    )  # put the v1, v2, and tauabs list into a dataframe for the first tree
    order1 = order1.sort_values(by="tauabs", ascending=False).reset_index(
        drop=True
    )  # sort this dataframe from highest to lowest tauabs
    Y2 = Y.copy()  # create copies of Y
    X2 = X.copy()  # create copies of X
    if vine == "R":
        # first find the pairs with the highest kendall tau using variables in Y
        if len(Y) > 1:
            for j in range(len(Y) - 1):
                # define all rows in order1 where only variables in Y are included
                for z in range(j + 1, len(Y)):
                    if z == 1:
                        l = ((order1.v1 == Y[j]) & (order1.v2 == Y[z])) | (
                            (order1.v1 == Y[z]) & (order1.v2 == Y[j])
                        )
                    else:
                        l = (
                            l
                            | ((order1.v1 == Y[j]) & (order1.v2 == Y[z]))
                            | ((order1.v1 == Y[z]) & (order1.v2 == Y[j]))
                        )
            inde = []  # list to put used rows of order1 in
            l = np.where(l == True)[0]
            # loop through the rows with variables in Y
            for k in l:
                if k == l[0]:  # add first one to the tree
                    order2 = order1.loc[k].to_frame().T
                    inde.append(k)  # add used rows to inde
                if k in inde:  # check if row has not been added before
                    continue

                lst = (
                    list(order2.v2[order2.v1 == order1.v2[k]])
                    + list(order2.v1[order2.v2 == order1.v2[k]])
                    + list(order2.v2[order2.v1 == order1.v1[k]])
                    + list(order2.v1[order2.v2 == order1.v1[k]])
                )
                l1 = list(order2.v2[order2.v1 == order1.v2[k]]) + list(
                    order2.v1[order2.v2 == order1.v2[k]]
                )
                l2 = list(order2.v2[order2.v1 == order1.v1[k]]) + list(
                    order2.v1[order2.v2 == order1.v1[k]]
                )
                lk = l1.copy()
                while len(lk) > 0:
                    lk2 = lk.copy()
                    lk = []
                    for j in lk2:
                        ln = list(order2.v2[order2.v1 == j]) + list(
                            order2.v1[order2.v2 == j]
                        )
                        try:
                            ln.remove(order1.v2[k])
                        except:
                            pass

                        for s in l1:
                            try:
                                ln.remove(s)
                            except:
                                pass
                        l1 = l1 + ln
                        lk = lk + ln

                lk = l2.copy()
                while len(lk) > 0:
                    lk2 = lk.copy()
                    lk = []
                    for j in lk2:
                        ln = list(order2.v2[order2.v1 == j]) + list(
                            order2.v1[order2.v2 == j]
                        )
                        try:
                            ln.remove(order1.v1[k])
                        except:
                            pass

                        for s in l2:
                            try:
                                ln.remove(s)
                            except:
                                pass
                        l2 = l2 + ln
                        lk = lk + ln
                skip = False
                for val in l1:
                    if val in l2:
                        skip = True
                        break
                if skip == True:
                    continue
                if len(lst) == len(set(lst)):
                    order2 = pd.concat(
                        [order2, order1.loc[k].to_frame().T], ignore_index=True
                    )
                    inde.append(k)
                    if (
                        len(order2) == len(Y) - 1
                    ):  # end loop when desired number of rows (number of variables in Y - 1) has been met
                        break

        else:  # if there is only one value in Y
            for z in X:
                # define all rows in order1 where variables in X are connected to variabels in Y
                if z == X[0]:
                    l = ((order1.v1 == Y[0]) & (order1.v2 == z)) | (
                        (order1.v1 == z) & (order1.v2 == Y[0])
                    )
                else:
                    l = (
                        l
                        | ((order1.v1 == Y[0]) & (order1.v2 == z))
                        | ((order1.v1 == z) & (order1.v2 == Y[0]))
                    )
            inde = []  # list to put used rows of order1 in
            l = np.where(l == True)[0]
            order2 = order1.loc[l[0]].to_frame().T  # add the first row to order 2
            inde.append(l[0])  # add used rows to inde
            # remove used variables in X from X and add them to Y
            if order2.v1.iloc[-1] in X2:
                Y2.append(int(order2.v1.iloc[-1]))
                X2.remove(int(order2.v1.iloc[-1]))
            else:
                Y2.append(int(order2.v2.iloc[-1]))
                X2.remove(int(order2.v2.iloc[-1]))

        while len(order2) < (dimen - 1):
            # Here we ensure that the last remaining X variable will be connected either to another original X variable, or a variable that is connected to an original X value
            if len(order2) == dimen - 2 and len(X) > 1:
                X3 = X.copy()
                X3.remove(X2[0])
                Y3 = []
                for i in X3:
                    Y3 = (
                        Y3
                        + list(order2.v1[order2.v2 == i])
                        + list(order2.v2[order2.v1 == i])
                        + [i]
                    )

                Y3 = list(np.unique(Y3))
                for j in Y3:
                    for z in X2:
                        if j == Y3[0] and z == X2[0]:
                            l = ((order1.v1 == j) & (order1.v2 == z)) | (
                                (order1.v1 == z) & (order1.v2 == j)
                            )
                        else:
                            l = (
                                l
                                | ((order1.v1 == j) & (order1.v2 == z))
                                | ((order1.v1 == z) & (order1.v2 == j))
                            )

                l = np.where(l == True)[0]

            else:
                # define all rows in order1 where variables in X are connected to variabels in Y
                for j in Y2:
                    for z in X2:
                        if j == Y2[0] and z == X2[0]:
                            l = ((order1.v1 == j) & (order1.v2 == z)) | (
                                (order1.v1 == z) & (order1.v2 == j)
                            )
                        else:
                            l = (
                                l
                                | ((order1.v1 == j) & (order1.v2 == z))
                                | ((order1.v1 == z) & (order1.v2 == j))
                            )

                l = np.where(l == True)[0]
            for k in l:
                lst = (
                    list(order2.v2[order2.v1 == order1.v2[k]])
                    + list(order2.v1[order2.v2 == order1.v2[k]])
                    + list(order2.v2[order2.v1 == order1.v1[k]])
                    + list(order2.v1[order2.v2 == order1.v1[k]])
                )
                l1 = list(order2.v2[order2.v1 == order1.v2[k]]) + list(
                    order2.v1[order2.v2 == order1.v2[k]]
                )
                l2 = list(order2.v2[order2.v1 == order1.v1[k]]) + list(
                    order2.v1[order2.v2 == order1.v1[k]]
                )
                lk = l1.copy()
                while len(lk) > 0:
                    lk2 = lk.copy()
                    lk = []
                    for j in lk2:
                        ln = list(order2.v2[order2.v1 == j]) + list(
                            order2.v1[order2.v2 == j]
                        )
                        try:
                            ln.remove(order1.v2[k])
                        except:
                            pass

                        for s in l1:
                            try:
                                ln.remove(s)
                            except:
                                pass
                        l1 = l1 + ln
                        lk = lk + ln

                lk = l2.copy()
                while len(lk) > 0:
                    lk2 = lk.copy()
                    lk = []
                    for j in lk2:
                        ln = list(order2.v2[order2.v1 == j]) + list(
                            order2.v1[order2.v2 == j]
                        )
                        try:
                            ln.remove(order1.v1[k])
                        except:
                            pass

                        for s in l2:
                            try:
                                ln.remove(s)
                            except:
                                pass
                        l2 = l2 + ln
                        lk = lk + ln
                skip = False
                for val in l1:
                    if val in l2:
                        skip = True
                        break
                if skip == True:
                    continue
                # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):
                # continue
                if order1.v1[k] in X and order1.v2[k] in X:
                    skip = False
                    if order1.v2[k] in Y2:

                        numapear = sum(
                            (
                                (order2["v1"].apply(lambda x: order1.v2[k] == x))
                                | (order2["v2"].apply(lambda x: order1.v2[k] == x))
                            )
                        )
                        for i in Y:
                            if numapear == sum(
                                (
                                    (order2["v1"].apply(lambda x: i == x))
                                    | (order2["v2"].apply(lambda x: i == x))
                                )
                            ):
                                skip = True
                                break
                    elif order1.v1[k] in Y2:

                        numapear = sum(
                            (
                                (order2["v1"].apply(lambda x: order1.v1[k] == x))
                                | (order2["v2"].apply(lambda x: order1.v1[k] == x))
                            )
                        )
                        for i in Y:
                            if numapear == sum(
                                (
                                    (order2["v1"].apply(lambda x: i == x))
                                    | (order2["v2"].apply(lambda x: i == x))
                                )
                            ):
                                skip = True
                                break
                    if skip == True:
                        continue
                if len(lst) == len(set(lst)):
                    order2 = pd.concat(
                        [order2, order1.loc[k].to_frame().T], ignore_index=True
                    )
                    inde.append(k)  # add used rows to inde
                    # remove used variables in X from X and add them to Y
                    if order2.v1.iloc[-1] in X2:
                        Y2.append(int(order2.v1.iloc[-1]))
                        X2.remove(int(order2.v1.iloc[-1]))
                    else:
                        Y2.append(int(order2.v2.iloc[-1]))
                        X2.remove(int(order2.v2.iloc[-1]))

                    break
    elif vine == "D":
        if len(Y) > 1:
            inde = []  # list to put used rows of order1 in
            for j in range(len(Y) - 1):
                # define all rows in order1 where only variables in Y are included
                for z in range(j + 1, len(Y)):
                    if z == 1:
                        l = ((order1.v1 == Y[j]) & (order1.v2 == Y[z])) | (
                            (order1.v1 == Y[z]) & (order1.v2 == Y[j])
                        )
                    else:
                        l = (
                            l
                            | ((order1.v1 == Y[j]) & (order1.v2 == Y[z]))
                            | ((order1.v1 == Y[z]) & (order1.v2 == Y[j]))
                        )
            l2 = list(order1[~l].index)
            l = np.where(l == True)[0]
            order2 = order1.loc[l[0]].to_frame().T.reset_index(drop=True)
            vi = order2.v1[0]  # select the first variable included in this row
            vj = order2.v2[0]  # select the second variable included in this row
            while len(order2) < len(Y) - 1:
                for i in l:
                    if (
                        order1.v1[i] == vj
                        or order1.v2[i] == vi
                        or order1.v1[i] == vi
                        or order1.v2[i] == vj
                    ):  # find the rows which include at least one of the two variables from the previous edge
                        if (
                            order1.v1[i] in inde or order1.v2[i] in inde
                        ):  # check if variables have already been connected to other variables twice
                            continue
                        if (order1.v1[i] == vi and order1.v2[i] == vj) or (
                            order1.v1[i] == vj and order1.v2[i] == vi
                        ):  # skip rows that have already been used
                            continue

                        else:
                            order2 = pd.concat(
                                [order2, order1.loc[i].to_frame().T], ignore_index=True
                            )  # if conditions are met, add this to the dataframe of the first tree
                            if (
                                order1.v1[i] == vj or order1.v2[i] == vj
                            ):  # check which value has been used twice already
                                inde.append(vj)  # add this value to inde
                                if order1.v1[i] == vj:
                                    vj = order1.v2[i]  # select the new edge to connect to
                                else:
                                    vj = order1.v1[i]  # select the new edge to connect to
                                break
                            elif (
                                order1.v1[i] == vi or order1.v2[i] == vi
                            ):  # check which value has been used twice already
                                inde.append(vi)  # add this value to inde
                                if order1.v1[i] == vi:
                                    vi = order1.v2[i]  # select the new edge to connect to
                                else:
                                    vi = order1.v1[i]  # select the new edge to connect to
                                break
            while len(order2) < u1.shape[1] -1:
                for i in l2:
                    if (
                        order1.v1[i] == vj
                        or order1.v2[i] == vi
                        or order1.v1[i] == vi
                        or order1.v2[i] == vj
                    ):  # find the rows which include at least one of the two variables from the previous edge
                        if (
                            order1.v1[i] in inde or order1.v2[i] in inde
                        ):  # check if variables have already been connected to other variables twice
                            continue
                        if (order1.v1[i] == vi and order1.v2[i] == vj) or (
                            order1.v1[i] == vj and order1.v2[i] == vi
                        ):  # skip rows that have already been used
                            continue

                        else:
                            order2 = pd.concat(
                                [order2, order1.loc[i].to_frame().T], ignore_index=True
                            )  # if conditions are met, add this to the dataframe of the first tree
                            if (
                                order1.v1[i] == vj or order1.v2[i] == vj
                            ):  # check which value has been used twice already
                                inde.append(vj)  # add this value to inde
                                if order1.v1[i] == vj:
                                    vj = order1.v2[i]  # select the new edge to connect to
                                else:
                                    vj = order1.v1[i]  # select the new edge to connect to
                            elif (
                                order1.v1[i] == vi or order1.v2[i] == vi
                            ):  # check which value has been used twice already
                                inde.append(vi)  # add this value to inde
                                if order1.v1[i] == vi:
                                    vi = order1.v2[i]  # select the new edge to connect to
                                else:
                                    vi = order1.v1[i]  # select the new edge to connect to
            
        else:
            inde = []  # lsit to put used variables in
            for j in range(dimen - 1):
                if j == 0:
                    order2 = order1.head(
                        1
                    )  # loop through all the rows to include those pairs with the highest ktau first
                    vi = order2.v1[0]  # select the first variable included in this row
                    vj = order2.v2[0]  # select the second variable included in this row
                else:
                    for i in range(len(order1)):
                        if (
                            order1.v1[i] == vj
                            or order1.v2[i] == vi
                            or order1.v1[i] == vi
                            or order1.v2[i] == vj
                        ):  # find the rows which include at least one of the two variables from the previous edge
                            if (
                                order1.v1[i] in inde or order1.v2[i] in inde
                            ):  # check if variables have already been connected to other variables twice
                                continue
                            if (order1.v1[i] == vi and order1.v2[i] == vj) or (
                                order1.v1[i] == vj and order1.v2[i] == vi
                            ):  # skip rows that have already been used
                                continue
                            else:
                                order2 = pd.concat(
                                    [order2, order1.loc[i].to_frame().T],
                                    ignore_index=True,
                                )  # if conditions are met, add this to the dataframe of the first tree
                                if (
                                    order1.v1[i] == vj or order1.v2[i] == vj
                                ):  # check which value has been used twice already
                                    inde.append(vj)  # add this value to inde
                                    if order1.v1[i] == vj:
                                        vj = order1.v2[
                                            i
                                        ]  # select the new edge to connect to
                                    else:
                                        vj = order1.v1[
                                            i
                                        ]  # select the new edge to connect to
                                elif (
                                    order1.v1[i] == vi or order1.v2[i] == vi
                                ):  # check which value has been used twice already
                                    inde.append(vi)  # add this value to inde
                                    if order1.v1[i] == vi:
                                        vi = order1.v2[
                                            i
                                        ]  # select the new edge to connect to
                                    else:
                                        vi = order1.v1[
                                            i
                                        ]  # select the new edge to connect to
    elif vine == "C":
        taus = []  # list to put the sum of all tauabs in for a specific variable
        for i in Y:
            taus.append(
                sum(order1[(order1.v1 == i) | (order1.v2 == i)].tauabs)
            )  # calculate the sum of all taus for a specific variable
        i = Y[
            np.where(np.array(taus) == max(taus))[0][0]
        ]  # find where the sum of the taus is the highest to find the vairable to place in the center of the first tree
        order2 = order1[(order1.v1 == i) | (order1.v2 == i)].reset_index(
            drop=True
        )  # select the rows that include this variable
        order2["both_in_Y"] = order2.apply(
            lambda row: (row["v1"] in Y) and (row["v2"] in Y), axis=1
        )  # find the columns with values in Y
        order2 = order2.sort_values(by="both_in_Y", ascending=False).reset_index(
            drop=True
        )  # Sort the DataFrame by the 'both_in_Y' column
        order2 = order2.drop(columns=["both_in_Y"])  # delete this column

    order1 = order2  # make order1 == the first tree
    del order2
    rhos = []  # list for the rhos
    node = []  # list for the nodes
    cops = []  # list for the copulas
    v1_1 = []  # list for the 1st nodes
    v2_1 = []  # list for the 2nd nodes
    aics = []  # list for the AIC's
    for i in range(len(order1)):
        v1i = int(order1.v1[i])  # first node
        v2i = int(order1.v2[i])  # second node
        u3 = np.vstack(
            (u1[:, v1i], u1[:, v2i])
        ).T  # stacking the combination to fit copula to
        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
        aics.append(aic)  # add AIC to aics
        rhos.append(rho)  # add parameters to rhos
        cops.append(cop)  # add copula to cops
        node.append([v1i, v2i])  # create final node
        v1_1.append(u1[:, v1i])  # add array of first node
        v2_1.append(u1[:, v2i])  # add array of second node
    v1_1 = np.array(v1_1).T  # v1
    v2_1 = np.array(v2_1).T  # v2

    # add information to dataframe of the first tree
    order1["rhos"] = rhos
    order1["node"] = node
    order1["tree"] = 0
    order1["cop"] = cops
    order1["AIC"] = aics

    # set up variables for the second tree
    v1 = []
    v2 = []
    ktau = []
    rhos = []
    node = []
    cops = []
    v1_k = []
    v2_k = []
    aics = []
    for i in range(
        len(order1) - 1
    ):  # loop through the first tree to identify all possible combination of nodes
        v1i = int(order1.v1[i])  # parent node 1 from nodei in tree
        v2i = int(order1.v2[i])  # parent node 2 from nodei in tree
        copi = int(order1.cop[i])  # copula of nodei
        pari = order1.rhos[i]  # parameters of nodei
        for j in (
            np.where(
                np.array([item == v1i for item in list(order1.v1[i + 1 :])])
                | np.array([item == v1i for item in list(order1.v2[i + 1 :])])
                | np.array([item == v2i for item in list(order1.v1[i + 1 :])])
                | np.array([item == v2i for item in list(order1.v2[i + 1 :])])
            )[0]
            + i
            + 1
        ):  # see if possible connection between nodei and nodej
            v1j = int(order1.v1[j])  # parent node 1 from nodej in tree
            v2j = int(order1.v2[j])  # parent node 2 from nodej in tree
            copj = int(order1.cop[j])  # copula of nodei
            parj = order1.rhos[j]  # parameters of nodei
            v1.append(order1.node[i])  # parent node for next tree
            v2.append(order1.node[j])  # parent node for next tree
            lst = order1.node[i] + order1.node[j]  # list of all variables in node
            s = max(
                set(lst), key=lst.count
            )  # variable that is common in both parent nodes
            # parent node values
            ui1 = v1_1[:, i]
            ui2 = v2_1[:, i]
            uj1 = v1_1[:, j]
            uj2 = v2_1[:, j]
            # defining which parent node the conditional CDF needs to be based on
            if v1i == s:
                uni = 1
                vi1 = v2i
            else:
                uni = 2
                vi1 = v1i
            if v1j == s:
                unj = 1
                vj1 = v2j
            else:
                unj = 2
                vj1 = v1j
            # calculate the conditional CDF
            v1igs = hfunc(copi, ui1, ui2, pari, un=uni)
            v2jgs = hfunc(copj, uj1, uj2, parj, un=unj)
            ktau.append(
                abs(st.kendalltau(v1igs, v2jgs)[0])
            )  # calculate the absolute kendall tau between v1igs and v2jgs, add it to the ktau list
            # add the parent node values
            v1_k.append(v1igs)
            v2_k.append(v2jgs)
            # format the final node
            node.append([vi1, vj1, "g", s])

    k = 2  # second tree

    orderk = pd.DataFrame(
        {"v1": v1, "v2": v2, "tauabs": ktau, "node": node}
    )  # put the v1, v2, tauabs, and the node in list into a dataframe for tree k

    if vine == "R":
        if len(orderk) > dimen - k:
            sorder2 = order1.node  # get all the unique parent nodes
            # divide the unique nodes in Y and X
            if len(Y) - 1 > 0:
                Y2 = list(sorder2[: len(Y) - 1].reset_index(drop=True))
                X2 = list(sorder2[len(Y) - 1 :].reset_index(drop=True))
            else:
                Y2 = list(sorder2[:1].reset_index(drop=True))
                X2 = list(sorder2[1:].reset_index(drop=True))
            orderk = orderk.sort_values(
                by="tauabs", ascending=False
            )  # sort this dataframe from highest to lowest tauabs
            indexi = list(
                orderk.index
            )  # create a list the original order of the dataframe prior to sorting
            orderk = orderk.reset_index(drop=True)  # reset the index
            inde = []  # list to put used rows of orderk i
            inde2 = (
                []
            )  #  list to put used rows of orderk in based on their oroginal position in the dataframe
            Y3 = Y2.copy()
            X3 = X2.copy()
            if len(Y2) == 1:  # if there is only one node in Y
                # define all rows in orderk where the nodes in X are connected to nodes in Y
                for j in Y2:
                    for z in X2:
                        if j == Y2[0] and z == X2[0]:
                            l = (
                                (orderk["v1"].apply(lambda x: j == x))
                                & (orderk["v2"].apply(lambda x: z == x))
                            ) | (
                                (orderk["v1"].apply(lambda x: z == x))
                                & (orderk["v2"].apply(lambda x: j == x))
                            )
                        else:
                            l = (
                                l
                                | (
                                    (orderk["v1"].apply(lambda x: j == x))
                                    & (orderk["v2"].apply(lambda x: z == x))
                                )
                                | (
                                    (orderk["v1"].apply(lambda x: z == x))
                                    & (orderk["v2"].apply(lambda x: j == x))
                                )
                            )

                l = np.where(l == True)[0]
                order = orderk.loc[l[0]].to_frame().T
                inde.append(l[0])
                inde2.append(indexi[l[0]])  # add used rows to inde (original row value)
                # remove used nodes in X from X and add them to Y
                if order.v1.iloc[-1] in X3:

                    X3.remove(order.v1.iloc[-1])
                    Y3.append(order.v1.iloc[-1])
                else:
                    X3.remove(order.v2.iloc[-1])
                    Y3.append(order.v2.iloc[-1])
            else:
                # first find the pairs with the highest kendall tau using nodes in Y
                for j in range(len(Y2) - 1):
                    # define all rows in orderk where only nodes in Y are included
                    for z in range(j + 1, len(Y2)):
                        if z == 1:
                            l = (
                                (orderk["v1"].apply(lambda x: Y2[j] == x))
                                & (orderk["v2"].apply(lambda x: Y2[z] == x))
                            ) | (
                                (orderk["v1"].apply(lambda x: Y2[z] == x))
                                & (orderk["v2"].apply(lambda x: Y2[j] == x))
                            )
                        else:
                            l = (
                                l
                                | (
                                    (orderk["v1"].apply(lambda x: Y2[j] == x))
                                    & (orderk["v2"].apply(lambda x: Y2[z] == x))
                                )
                                | (
                                    (orderk["v1"].apply(lambda x: Y2[z] == x))
                                    & (orderk["v2"].apply(lambda x: Y2[j] == x))
                                )
                            )

                l = np.where(l == True)[0]
                # loop through the rows with nodes in Y
                for k in l:
                    if k == l[0]:  # add first one to the tree
                        order = orderk.loc[k].to_frame().T
                        inde.append(k)
                        inde2.append(
                            indexi[k]
                        )  # add used rows to inde (original row value)
                    if k in inde:  # check if row has not been added before
                        continue
                    lst = (
                        list(
                            order.v2.astype(str)[
                                order.v1.astype(str) == str(orderk.v1[k])
                            ]
                        )
                        + list(
                            order.v1.astype(str)[
                                order.v2.astype(str) == str(orderk.v1[k])
                            ]
                        )
                        + list(
                            order.v2.astype(str)[
                                order.v1.astype(str) == str(orderk.v2[k])
                            ]
                        )
                        + list(
                            order.v1.astype(str)[
                                order.v2.astype(str) == str(orderk.v2[k])
                            ]
                        )
                    )
                    l1 = list(
                        order.v2.astype(str)[order.v1.astype(str) == str(orderk.v1[k])]
                    ) + list(
                        order.v1.astype(str)[order.v2.astype(str) == str(orderk.v1[k])]
                    )
                    lk = l1.copy()
                    while len(lk) > 0:
                        lk2 = lk.copy()
                        lk = []
                        for j in lk2:
                            ln = list(
                                order.v2.astype(str)[order.v1.astype(str) == j]
                            ) + list(order.v1.astype(str)[order.v2.astype(str) == j])
                            try:
                                ln.remove(str(orderk.v1[k]))
                            except:
                                pass

                            for s in l1:
                                try:
                                    ln.remove(s)
                                except:
                                    pass
                            l1 = l1 + ln
                            lk = lk + ln

                    l2 = list(
                        order.v2.astype(str)[order.v1.astype(str) == str(orderk.v2[k])]
                    ) + list(
                        order.v1.astype(str)[order.v2.astype(str) == str(orderk.v2[k])]
                    )
                    lk = l2.copy()
                    while len(lk) > 0:
                        lk2 = lk.copy()
                        lk = []
                        for j in lk2:
                            ln = list(
                                order.v2.astype(str)[order.v1.astype(str) == j]
                            ) + list(order.v1.astype(str)[order.v2.astype(str) == j])
                            try:
                                ln.remove(str(orderk.v2[k]))
                            except:
                                pass

                            for s in l2:
                                try:
                                    ln.remove(s)
                                except:
                                    pass
                            l2 = l2 + ln
                            lk = lk + ln
                    skip = False
                    for val in l1:
                        if val in l2:
                            skip = True
                            break
                    if skip == True:
                        continue
                    # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):  # ensure that there are no groups of variables that all have a connection with one another
                    # continue
                    if len(lst) == len(set(lst)):
                        order = pd.concat(
                            [order, orderk.loc[k].to_frame().T], ignore_index=True
                        )
                        inde.append(k)
                        inde2.append(indexi[k])
                        if (
                            len(order) == len(Y2) - 1
                        ):  # end loop when desired number of rows (number of variables in Y - 1) has been met
                            break

            while len(order) < (dimen - 2):
                # define all rows in orderk where nodes in X are connected to nodes in Y
                for j in Y3:
                    for z in X3:
                        if j == Y3[0] and z == X3[0]:
                            l = (
                                (orderk["v1"].apply(lambda x: j == x))
                                & (orderk["v2"].apply(lambda x: z == x))
                            ) | (
                                (orderk["v1"].apply(lambda x: z == x))
                                & (orderk["v2"].apply(lambda x: j == x))
                            )
                        else:
                            l = (
                                l
                                | (
                                    (orderk["v1"].apply(lambda x: j == x))
                                    & (orderk["v2"].apply(lambda x: z == x))
                                )
                                | (
                                    (orderk["v1"].apply(lambda x: z == x))
                                    & (orderk["v2"].apply(lambda x: j == x))
                                )
                            )

                l = np.where(l == True)[0]
                for k in l:
                    if k in inde:
                        continue
                    lst = (
                        list(
                            order.v2.astype(str)[
                                order.v1.astype(str) == str(orderk.v1[k])
                            ]
                        )
                        + list(
                            order.v1.astype(str)[
                                order.v2.astype(str) == str(orderk.v1[k])
                            ]
                        )
                        + list(
                            order.v2.astype(str)[
                                order.v1.astype(str) == str(orderk.v2[k])
                            ]
                        )
                        + list(
                            order.v1.astype(str)[
                                order.v2.astype(str) == str(orderk.v2[k])
                            ]
                        )
                    )
                    l1 = list(
                        order.v2.astype(str)[order.v1.astype(str) == str(orderk.v1[k])]
                    ) + list(
                        order.v1.astype(str)[order.v2.astype(str) == str(orderk.v1[k])]
                    )
                    lk = l1.copy()
                    while len(lk) > 0:
                        lk2 = lk.copy()
                        lk = []
                        for j in lk2:
                            ln = list(
                                order.v2.astype(str)[order.v1.astype(str) == j]
                            ) + list(order.v1.astype(str)[order.v2.astype(str) == j])
                            try:
                                ln.remove(str(orderk.v1[k]))
                            except:
                                pass

                            for s in l1:
                                try:
                                    ln.remove(s)
                                except:
                                    pass
                            l1 = l1 + ln
                            lk = lk + ln

                    l2 = list(
                        order.v2.astype(str)[order.v1.astype(str) == str(orderk.v2[k])]
                    ) + list(
                        order.v1.astype(str)[order.v2.astype(str) == str(orderk.v2[k])]
                    )
                    lk = l2.copy()
                    while len(lk) > 0:
                        lk2 = lk.copy()
                        lk = []
                        for j in lk2:
                            ln = list(
                                order.v2.astype(str)[order.v1.astype(str) == j]
                            ) + list(order.v1.astype(str)[order.v2.astype(str) == j])
                            try:
                                ln.remove(str(orderk.v2[k]))
                            except:
                                pass

                            for s in l2:
                                try:
                                    ln.remove(s)
                                except:
                                    pass
                            l2 = l2 + ln
                            lk = lk + ln
                    skip = False
                    for val in l1:
                        if val in l2:
                            skip = True
                            break
                    if skip == True:
                        continue
                    # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):
                    # continue
                    if len(lst) == len(set(lst)):
                        order = pd.concat(
                            [order, orderk.loc[k].to_frame().T], ignore_index=True
                        )
                        inde.append(k)
                        inde2.append(indexi[k])
                        # remove used variables in X from X and add them to Y
                        if order.v1.iloc[-1] in X3:

                            X3.remove(order.v1.iloc[-1])
                            Y3.append(order.v1.iloc[-1])
                            # X2.remove(int(order2.v1.iloc[-1]))
                        else:
                            X3.remove(order.v2.iloc[-1])
                            Y3.append(order.v2.iloc[-1])

                        break
            orderk = order
            k = 2
            # orderk = orderk.sort_values(by='tauabs', ascending=False)
            v1_k = np.array([v1_k[ind] for ind in inde]).T  # sort array of first node
            v2_k = np.array([v2_k[ind] for ind in inde]).T  # sort array of second node
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()
        else:
            orderk = orderk.sort_values(
                by="tauabs", ascending=False
            )  # sort this dataframe from highest to lowest tauabs
            v1_k = np.array(
                [v1_k[ind] for ind in orderk.index]
            ).T  # sort array of first node
            v2_k = np.array(
                [v2_k[ind] for ind in orderk.index]
            ).T  # sort array of second node
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()

    elif vine == "D":
        orderk = orderk.sort_values(
            by="tauabs", ascending=False
        )  # sort this dataframe from highest to lowest tauabs
        v1_k = np.array(
            [v1_k[ind] for ind in orderk.index]
        ).T  # sort array of first node
        v2_k = np.array(
            [v2_k[ind] for ind in orderk.index]
        ).T  # sort array of second node
        orderk = orderk.reset_index(drop=True)
        orderk["tree"] = k - 1
        v1_2 = v1_k.copy()
        v2_2 = v2_k.copy()
        order2 = orderk.copy()

    elif vine == "C":
        if len(orderk) > dimen - k:
            subnodes = order1[
                ["v1", "v2"]
            ].values.tolist()  # see all the unique parent nodes
            taus = []  # list to put the sum of all tauabs in for a specific nodes
            for i in range(len(Y) - 1):
                orderksub = orderk[
                    (orderk["v1"].apply(lambda x: x == subnodes[i]))
                    | (orderk["v2"].apply(lambda x: x == subnodes[i]))
                ]
                taus.append(
                    sum(orderksub.tauabs)
                )  # calculate the sum of all taus for a specific variable
            i = np.where(np.array(taus) == max(taus))[0][
                0
            ]  # find where the sum of the taus is the highest to find the vairable to place in the center of the first tree
            orderk = orderk[
                (orderk["v1"].apply(lambda x: x == subnodes[i]))
                | (orderk["v2"].apply(lambda x: x == subnodes[i]))
            ]  # select rows where this node is included
            inde = list(orderk.index)
            v1_k = np.array([v1_k[ind] for ind in inde]).T  # sort array of first node
            v2_k = np.array([v2_k[ind] for ind in inde]).T  # sort array of second node
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()
        else:
            orderk = orderk.sort_values(by="tauabs", ascending=False)
            v1_k = np.array([v1_k[ind] for ind in orderk.index]).T
            v2_k = np.array([v2_k[ind] for ind in orderk.index]).T
            orderk = orderk.reset_index(drop=True)
            orderk["tree"] = k - 1
            v1_2 = v1_k.copy()
            v2_2 = v2_k.copy()
            order2 = orderk.copy()

    for i in range(len(order2)):
        u3 = np.vstack(
            (v1_2[:, i], v2_2[:, i])
        ).T  # stacking the combination to fit copula to
        cop, rho, aic = bestcop(copsi, u3)  # fit the best copula
        aics.append(aic)  # add AIC to aics
        rhos.append(rho)  # add parameters to rhos
        cops.append(cop)  # add copula to cops

    # add information to dataframe of tree k
    order2["rhos"] = rhos
    order2["cop"] = cops
    order2["AIC"] = aics

    # loop through remaining trees if there are more than 3 variables
    if dimen > 3:
        for k in range(3, dimen):
            order = locals()[
                "order" + str(k - 1)
            ].copy()  # select the struture of the previous tree
            v1s = locals()[
                "v1_" + str(k - 1)
            ].copy()  # select the first nodes of the previous tree
            v2s = locals()[
                "v2_" + str(k - 1)
            ].copy()  # select the second nodes of the previous tree
            # create lists for the variables
            v1_k = []
            v2_k = []
            v1 = []
            v2 = []
            ktau = []
            rhos = []
            cops = []
            node = []
            lk = []
            aics = []
            rk = []
            for i in range(len(order) - 1):
                v1i = order.v1[i].copy()  # parent node 1 from nodei in tree
                v2i = order.v2[i].copy()  # parent node 2 from nodei in tree
                copi = int(order.cop[i])  # copula of nodei
                pari = order.rhos[i]  # parameters of nodei
                for j in (
                    np.where(
                        np.array([item == v1i for item in list(order.v1[i + 1 :])])
                        | np.array([item == v1i for item in list(order.v2[i + 1 :])])
                        | np.array([item == v2i for item in list(order.v1[i + 1 :])])
                        | np.array([item == v2i for item in list(order.v2[i + 1 :])])
                    )[0]
                    + i
                    + 1
                ):  # see if possible connection between nodei and nodej
                    v1i = order.v1[i].copy()  # parent node 1 from nodei in tree
                    v2i = order.v2[i].copy()  # parent node 2 from nodei in tree
                    copj = int(order.cop[j])  # copula of nodei
                    parj = order.rhos[j]  # parameters of nodei
                    nodei = order.node[i]  # nodei
                    nodej = order.node[j]  # nodej
                    v1j = order.v1[j].copy()  # parent node 1 from nodej in tree
                    v2j = order.v2[j].copy()  # parent node 2 from nodej in tree
                    v1.append(nodei)  # new parent nodes 1
                    v2.append(nodej)  # new parent nodes 2
                    n = 2
                    ri = nodei[n + 1 :]  # select the values on the right side of node i
                    rj = nodej[n + 1 :]  # select the values on the right side of node i

                    if "g" in v1j:
                        v1j.remove("g")
                        v2j.remove("g")
                        v1i.remove("g")
                        v2i.remove("g")

                    # define left and right side of the node in tree k
                    if rj == ri:
                        if len(v1j) == 2:
                            lst = nodei[:n] + nodej[:n]
                            r3 = [max(set(lst), key=lst.count)]
                            li = [value for value in nodei[:n] if value not in r3]
                            lj = [value for value in nodej[:n] if value not in r3]
                            r = list(np.unique(ri + r3))
                        else:
                            lst = v1i[:n] + v2i[:n] + v1j[:n] + v2j[:n]
                            for s in ri:
                                lst = [x for x in lst if x != s]
                            r3 = [max(set(lst), key=lst.count)]
                            li = [value for value in nodei[:n] if value not in r3]
                            lj = [value for value in nodej[:n] if value not in r3]
                            r = list(np.unique(ri + r3))
                        l = list(np.unique(li + lj))
                    else:
                        r = list(np.unique(ri + rj))
                        li = [value for value in nodei[:n] if value not in rj]
                        lj = [value for value in nodej[:n] if value not in ri]
                        if li == lj:
                            lst = v1i[:n] + v2i[:n] + v1j[:n] + v2j[:n]
                            lst = [x for x in lst if x != li[0]]
                            l3 = [min(set(lst), key=lst.count)]
                            l = list(np.unique(li + l3))
                        else:
                            l = list(np.unique(li + lj))
                    # select the parent node values
                    ui1 = v1s[:, i]
                    ui2 = v2s[:, i]
                    uj1 = v1s[:, j]
                    uj2 = v2s[:, j]
                    if set(r).issubset(set(v1i)):
                        uni = 1
                    elif set(r).issubset(set(v2i)):
                        uni = 2
                    elif set(rj).issubset(set(v2i[1:])):
                        uni = 2
                    elif set(rj).issubset(set(v1i[1:])):
                        uni = 1
                    if set(r).issubset(set(v1j)):
                        unj = 1
                    elif set(r).issubset(set(v2j)):
                        unj = 2
                    elif set(ri).issubset(set(v2j[1:])):
                        unj = 2
                    elif set(ri).issubset(set(v1j[1:])):
                        unj = 1

                    # calculate the conditional CDF
                    v1igs = hfunc(copi, ui1, ui2, pari, un=uni)
                    v2jgs = hfunc(copj, uj1, uj2, parj, un=unj)
                    ktau.append(
                        abs(st.kendalltau(v1igs, v2jgs)[0])
                    )  # calculate the absolute kendall tau between v1igs and v2jgs, add it to the ktau list
                    # add the parent node values
                    v1_k.append(v1igs)
                    v2_k.append(v2jgs)

                    del uj1, ui1, ui2, uj2

                    node.append(l + ["g"] + r)  # set node name
                    lk.append(l)  # add left side of node
                    rk.append(r)  # add rigth side of node
            orderk = pd.DataFrame(
                {"v1": v1, "v2": v2, "tauabs": ktau, "node": node, "l": lk, "r": rk}
            )  # put information in dataframe for tree k

            if vine == "R":

                if len(orderk) > dimen - k:
                    sorder2 = order.node  # get all the unique parent nodes
                    # divide the unique nodes in Y and X
                    if (len(Y) + 1 - k) > 1:
                        Y2 = list(sorder2[: len(Y) - k].reset_index(drop=True))
                        X2 = list(sorder2[len(Y) - k :].reset_index(drop=True))
                    else:
                        Y2 = list(sorder2[:1].reset_index(drop=True))
                        X2 = list(sorder2[1:].reset_index(drop=True))
                    Y3 = Y2.copy()
                    X3 = X2.copy()

                    orderk = orderk.sort_values(by="tauabs", ascending=False)
                    indexi = list(orderk.index)
                    orderk = orderk.reset_index(drop=True)
                    inde = []
                    inde2 = []

                    if len(Y2) > 1:
                        for j in range(len(Y2) - 1):
                            for z in range(j + 1, len(Y2)):
                                if z == 1:
                                    l = (
                                        (orderk["v1"].apply(lambda x: Y2[j] == x))
                                        & (orderk["v2"].apply(lambda x: Y2[z] == x))
                                    ) | (
                                        (orderk["v1"].apply(lambda x: Y2[z] == x))
                                        & (orderk["v2"].apply(lambda x: Y2[j] == x))
                                    )
                                else:
                                    l = (
                                        l
                                        | (
                                            (orderk["v1"].apply(lambda x: Y2[j] == x))
                                            & (orderk["v2"].apply(lambda x: Y2[z] == x))
                                        )
                                        | (
                                            (orderk["v1"].apply(lambda x: Y2[z] == x))
                                            & (orderk["v2"].apply(lambda x: Y2[j] == x))
                                        )
                                    )

                        l = np.where(l == True)[0]
                        for z in l:
                            if z == l[0]:
                                order = orderk.loc[z].to_frame().T
                                inde.append(z)
                                inde2.append(indexi[z])
                            if z in inde:
                                continue
                            lst = (
                                list(
                                    order.v2.astype(str)[
                                        order.v1.astype(str) == str(orderk.v1[z])
                                    ]
                                )
                                + list(
                                    order.v1.astype(str)[
                                        order.v2.astype(str) == str(orderk.v1[z])
                                    ]
                                )
                                + list(
                                    order.v2.astype(str)[
                                        order.v1.astype(str) == str(orderk.v2[z])
                                    ]
                                )
                                + list(
                                    order.v1.astype(str)[
                                        order.v2.astype(str) == str(orderk.v2[z])
                                    ]
                                )
                            )
                            l1 = list(
                                order.v2.astype(str)[
                                    order.v1.astype(str) == str(orderk.v1[z])
                                ]
                            ) + list(
                                order.v1.astype(str)[
                                    order.v2.astype(str) == str(orderk.v1[z])
                                ]
                            )
                            lk = l1.copy()
                            while len(lk) > 0:
                                lk2 = lk.copy()
                                lk = []
                                for j in lk2:
                                    ln = list(
                                        order.v2.astype(str)[order.v1.astype(str) == j]
                                    ) + list(
                                        order.v1.astype(str)[order.v2.astype(str) == j]
                                    )
                                    try:
                                        ln.remove(str(orderk.v1[z]))
                                    except:
                                        pass

                                    for s in l1:
                                        try:
                                            ln.remove(s)
                                        except:
                                            pass
                                    l1 = l1 + ln
                                    lk = lk + ln

                            l2 = list(
                                order.v2.astype(str)[
                                    order.v1.astype(str) == str(orderk.v2[z])
                                ]
                            ) + list(
                                order.v1.astype(str)[
                                    order.v2.astype(str) == str(orderk.v2[z])
                                ]
                            )
                            lk = l2.copy()
                            while len(lk) > 0:
                                lk2 = lk.copy()
                                lk = []
                                for j in lk2:
                                    ln = list(
                                        order.v2.astype(str)[order.v1.astype(str) == j]
                                    ) + list(
                                        order.v1.astype(str)[order.v2.astype(str) == j]
                                    )
                                    try:
                                        ln.remove(str(orderk.v2[z]))
                                    except:
                                        pass

                                    for s in l2:
                                        try:
                                            ln.remove(s)
                                        except:
                                            pass
                                    l2 = l2 + ln
                                    lk = lk + ln
                            skip = False
                            for val in l1:
                                if val in l2:
                                    skip = True
                                    break
                            if skip == True:
                                continue
                            # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):
                            # continue
                            if len(lst) == len(set(lst)):
                                order = pd.concat(
                                    [order, orderk.loc[z].to_frame().T],
                                    ignore_index=True,
                                )
                                inde.append(z)
                                inde2.append(indexi[z])
                                break
                    else:
                        for j in Y2:
                            for z in X2:
                                if j == Y2[0] and z == X2[0]:
                                    l = (
                                        (orderk["v1"].apply(lambda x: j == x))
                                        & (orderk["v2"].apply(lambda x: z == x))
                                    ) | (
                                        (orderk["v1"].apply(lambda x: z == x))
                                        & (orderk["v2"].apply(lambda x: j == x))
                                    )
                                else:
                                    l = (
                                        l
                                        | (
                                            (orderk["v1"].apply(lambda x: j == x))
                                            & (orderk["v2"].apply(lambda x: z == x))
                                        )
                                        | (
                                            (orderk["v1"].apply(lambda x: z == x))
                                            & (orderk["v2"].apply(lambda x: j == x))
                                        )
                                    )

                        l = np.where(l == True)[0]
                        order = orderk.loc[l[0]].to_frame().T
                        inde.append(l[0])
                        inde2.append(indexi[l[0]])
                        if order.v1.iloc[-1] in X3:

                            X3.remove(order.v1.iloc[-1])
                            Y3.append(order.v1.iloc[-1])
                            # X2.remove(int(order2.v1.iloc[-1]))
                        else:
                            X3.remove(order.v2.iloc[-1])
                            Y3.append(order.v2.iloc[-1])

                    while len(order) < (dimen - k):
                        for j in Y3:
                            for z in X3:
                                if j == Y3[0] and z == X3[0]:
                                    l = (
                                        (orderk["v1"].apply(lambda x: j == x))
                                        & (orderk["v2"].apply(lambda x: z == x))
                                    ) | (
                                        (orderk["v1"].apply(lambda x: z == x))
                                        & (orderk["v2"].apply(lambda x: j == x))
                                    )
                                else:
                                    l = (
                                        l
                                        | (
                                            (orderk["v1"].apply(lambda x: j == x))
                                            & (orderk["v2"].apply(lambda x: z == x))
                                        )
                                        | (
                                            (orderk["v1"].apply(lambda x: z == x))
                                            & (orderk["v2"].apply(lambda x: j == x))
                                        )
                                    )

                        l = np.where(l == True)[0]
                        for z in l:
                            if z in inde:
                                continue
                            lst = (
                                list(
                                    order.v2.astype(str)[
                                        order.v1.astype(str) == str(orderk.v1[z])
                                    ]
                                )
                                + list(
                                    order.v1.astype(str)[
                                        order.v2.astype(str) == str(orderk.v1[z])
                                    ]
                                )
                                + list(
                                    order.v2.astype(str)[
                                        order.v1.astype(str) == str(orderk.v2[z])
                                    ]
                                )
                                + list(
                                    order.v1.astype(str)[
                                        order.v2.astype(str) == str(orderk.v2[z])
                                    ]
                                )
                            )
                            l1 = list(
                                order.v2.astype(str)[
                                    order.v1.astype(str) == str(orderk.v1[z])
                                ]
                            ) + list(
                                order.v1.astype(str)[
                                    order.v2.astype(str) == str(orderk.v1[z])
                                ]
                            )
                            lk = l1.copy()
                            while len(lk) > 0:
                                lk2 = lk.copy()
                                lk = []
                                for j in lk2:
                                    ln = list(
                                        order.v2.astype(str)[order.v1.astype(str) == j]
                                    ) + list(
                                        order.v1.astype(str)[order.v2.astype(str) == j]
                                    )
                                    try:
                                        ln.remove(str(orderk.v1[z]))
                                    except:
                                        pass

                                    for s in l1:
                                        try:
                                            ln.remove(s)
                                        except:
                                            pass
                                    l1 = l1 + ln
                                    lk = lk + ln

                            l2 = list(
                                order.v2.astype(str)[
                                    order.v1.astype(str) == str(orderk.v2[z])
                                ]
                            ) + list(
                                order.v1.astype(str)[
                                    order.v2.astype(str) == str(orderk.v2[z])
                                ]
                            )
                            lk = l2.copy()
                            while len(lk) > 0:
                                lk2 = lk.copy()
                                lk = []
                                for j in lk2:
                                    ln = list(
                                        order.v2.astype(str)[order.v1.astype(str) == j]
                                    ) + list(
                                        order.v1.astype(str)[order.v2.astype(str) == j]
                                    )
                                    try:
                                        ln.remove(str(orderk.v2[z]))
                                    except:
                                        pass

                                    for s in l2:
                                        try:
                                            ln.remove(s)
                                        except:
                                            pass
                                    l2 = l2 + ln
                                    lk = lk + ln
                            skip = False
                            for val in l1:
                                if val in l2:
                                    skip = True
                                    break
                            if skip == True:
                                continue
                            # if (len(l1) > 1 and len(l2) >0) or (len(l1) > 0 and len(l2) >1):
                            # continue
                            # if orderk.iloc[z].v1 in X2 and orderk.iloc[z].v2 in X2:
                            # skip = False
                            # if orderk.iloc[z].v2 in Y3:

                            # numapear = sum(((order['v1'].apply(lambda x: orderk.iloc[z].v2== x )) | (order['v2'].apply(lambda x: orderk.iloc[z].v2== x))))
                            # for i in Y2:
                            #   if numapear == sum(((order['v1'].apply(lambda x: i== x )) | (order['v2'].apply(lambda x: i== x)))):
                            #     skip = True
                            #      break
                            # elif orderk.iloc[z].v1 in Y3:

                            #   numapear = sum(((order['v1'].apply(lambda x: orderk.iloc[z].v1== x )) | (order['v2'].apply(lambda x: orderk.iloc[z].v1== x))))
                            #  for i in Y2:
                            #      if numapear ==  sum(((order['v1'].apply(lambda x: i== x )) | (order['v2'].apply(lambda x: i== x)))):
                            #          skip = True
                            #          break
                            # if skip == True:
                            #    continue
                            if len(lst) == len(set(lst)):
                                order = pd.concat(
                                    [order, orderk.loc[z].to_frame().T],
                                    ignore_index=True,
                                )
                                inde.append(z)
                                inde2.append(indexi[z])
                                if order.v1.iloc[-1] in X3:

                                    X3.remove(order.v1.iloc[-1])
                                    Y3.append(order.v1.iloc[-1])
                                    # X2.remove(int(order2.v1.iloc[-1]))
                                else:
                                    X3.remove(order.v2.iloc[-1])
                                    Y3.append(order.v2.iloc[-1])

                                break

                    # orderk = orderk.sort_values(by='tauabs', ascending=False)
                    orderk = order
                    v1_k = np.array([v1_k[ind] for ind in inde2]).T
                    v2_k = np.array([v2_k[ind] for ind in inde2]).T
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1
                else:
                    orderk = orderk.sort_values(
                        by="tauabs", ascending=False
                    )  # sort this dataframe from highest to lowest tauabs
                    v1_k = np.array(
                        [v1_k[ind] for ind in orderk.index]
                    ).T  # sort array of first node
                    v2_k = np.array(
                        [v2_k[ind] for ind in orderk.index]
                    ).T  # sort array of second node
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1
                    v1_2 = v1_k.copy()
                    v2_2 = v2_k.copy()

            elif vine == "D":
                orderk = orderk.sort_values(
                    by="tauabs", ascending=False
                )  # sort this dataframe from highest to lowest tauabs
                v1_k = np.array(
                    [v1_k[ind] for ind in orderk.index]
                ).T  # sort array of first node
                v2_k = np.array(
                    [v2_k[ind] for ind in orderk.index]
                ).T  # sort array of second node
                orderk = orderk.reset_index(drop=True)
                orderk["tree"] = k - 1
                v1_2 = v1_k.copy()
                v2_2 = v2_k.copy()

            elif vine == "C":
                if len(orderk) > dimen - k:
                    subnodes = list(order.node)  # see all the unique parent nodes
                    taus = (
                        []
                    )  # list to put the sum of all tauabs in for a specific nodes
                    if (len(Y) + 1 - k) > 1:
                        for i in range(len(Y) + 1 - k):
                            orderksub = orderk[
                                (orderk["v1"].apply(lambda x: x == subnodes[i]))
                                | (orderk["v2"].apply(lambda x: x == subnodes[i]))
                            ]
                            taus.append(
                                sum(orderksub.tauabs)
                            )  # calculate the sum of all taus for a specific variable
                        i = np.where(np.array(taus) == max(taus))[0][
                            0
                        ]  # find where the sum of the taus is the highest to find the vairable to place in the center of the first tree
                        orderk = orderk[
                            (orderk["v1"].apply(lambda x: x == subnodes[i]))
                            | (orderk["v2"].apply(lambda x: x == subnodes[i]))
                        ]  # select rows where this node is included
                    else:
                        orderk = orderk[
                            (orderk["v1"].apply(lambda x: x == subnodes[0]))
                            | (orderk["v2"].apply(lambda x: x == subnodes[0]))
                        ]

                    inde = list(orderk.index)
                    v1_k = np.array(
                        [v1_k[ind] for ind in inde]
                    ).T  # sort array of first node
                    v2_k = np.array(
                        [v2_k[ind] for ind in inde]
                    ).T  # sort array of second node
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1
                    v1_k = v1_k.copy()
                    v2_k = v2_k.copy()

                else:
                    orderk = orderk.sort_values(by="tauabs", ascending=False)
                    v1_k = np.array([v1_k[ind] for ind in orderk.index]).T
                    v2_k = np.array([v2_k[ind] for ind in orderk.index]).T
                    orderk = orderk.reset_index(drop=True)
                    orderk["tree"] = k - 1

            for j in range(len(orderk)):
                u3 = np.vstack((v1_k[:, j], v2_k[:, j])).T
                cop, rho, aic = bestcop(copsi, u3)
                aics.append(aic)
                rhos.append(rho)
                cops.append(cop)

            orderk["rhos"] = rhos
            orderk["cop"] = cops
            orderk["AIC"] = aics
            locals()["v1_" + str(k)] = v1_k
            locals()["v2_" + str(k)] = v2_k
            locals()["order" + str(k)] = orderk

    order = pd.DataFrame(columns=order1.columns)
    for i in range(1, dimen):
        order = pd.concat([order, locals()["order" + str(i)]]).reset_index(drop=True)

    a = np.empty((dimen, dimen))
    c = np.empty((dimen, dimen))
    a[:] = np.nan
    c[:] = np.nan

    order["used"] = 0
    combinations = list(product([True, False], repeat=dimen))
    for i in list(range(dimen - 1))[::-1]:
        k1 = sorted(
            np.array(
                order[(order.tree == i) & (order["used"] == 0)].node.iloc[0][:2]
            ).astype(int)
        )[::-1]
        order.loc[(order["tree"] == i) & (order["used"] == 0), "used"] = 1
        t1 = i - 1
        ii = dimen - 2 - i
        a[i : dimen - ii, ii] = k1
        s = k1[-1]
        for j in list(range(0, i))[::-1]:
            orde = order[(order.tree == j) & (order["used"] == 0)]
            for k in range(len(orde)):
                arr = np.array(orde.node.iloc[k][:2]).astype(int)
                if np.isin(s, arr) == True:
                    inde = orde.iloc[k].name
                    a[j, ii] = arr[arr != s][0]
                    order["used"][inde] = 1
                    continue

    a[0, dimen - 1] = a[0, dimen - 2]
    orderk = pd.DataFrame(columns=order.columns)
    p = np.empty((dimen, dimen))
    p[:] = np.nan
    p = p.astype(object)
    for i in list(range(dimen - 1)):
        orde = order[order.tree == i]
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)
            for j in range(len(orde)):
                arr = np.array(orde.node.iloc[j][:2]).astype(int)
                if sum(np.isin(akn, arr)) == 2:
                    orderj = order.loc[[orde.index[j]]]
                    p[i, k] = orderj.rhos.iloc[0]
                    c[i, k] = orderj.cop.iloc[0]
                    if i == 0:
                        orderj.node.iloc[0] = list(akn)
                    else:
                        orderj.node.iloc[0] = (
                            list(akn) + ["|"] + list((ak.astype(int)[:i])[::-1])
                        )
                    orderk = pd.concat([orderk, orderj]).reset_index(drop=True)

    sorder = list(np.diag(a[::-1])[::-1])
    if set(sorder[-len(X) :]) != set(X):
        sorders = samplingorder(a)
        if len(Y) >= len(X):
            for sorder in sorders:
                if set(sorder[-len(X) :]) == set(X):
                    break
        else:
            for sorder in sorders:
                if set(sorder[: len(Y)]) == set(Y):
                    break
        a, p, c = samplingmatrix(a, c, p, sorder)

    if printing == True:
        for i in list(range(0, dimen - 1)):
            orde = orderk[orderk.tree == i].reset_index(drop=True)
            print("** Tree: ", i + 1)
            for j in range(len(orde)):
                if i != 0:
                    nodej = ",".join(map(str, orde.node[j])).replace(",|,", "|")
                else:
                    nodej = ",".join(map(str, orde.node[j]))
                print(
                    nodej,
                    " ---> ",
                    copulas[int(orde.cop[j])],
                    ": parameters = ",
                    orde.rhos[j],
                )

    return a, p, c


# %%



[docs]
def plotvine(a, plottitle=None, variables=None, savepath=None):
    """
    Plots the vine structure

    Arguments:
        *a* : The vine tree structure provided as a triangular matrix, composed of integers. The integer refers to different variables depending on which column the variable was in u1, where the first column is 0 and the second column is 1, etc.

        *pltotitle* : title of the plot

        *savepath* : path to save the plot

    Returns:
     *plot*



    """
    dimen = a.shape[0]  # dimension of copula
    order = pd.DataFrame(
        columns=["node", "l", "r", "tree"]
    )  # dataframe with vine copula structure.
    s = 0
    for i in list(range(dimen - 1)):
        for k in list(range(dimen - 1 - i)):
            ak = a[:, k]
            akn = np.array([ak[-1 - k], ak[i]]).astype(int)
            if i == 0:
                single_row_values = {
                    "node": list(akn),
                    "l": akn[0],
                    "r": akn[1],
                    "tree": i,
                }
            else:
                single_row_values = {
                    "node": list(akn) + ["|"] + list((ak.astype(int)[:i])[::-1]),
                    "l": list(akn),
                    "r": list((ak.astype(int)[:i])[::-1]),
                    "tree": i,
                }

            order.loc[s] = single_row_values
            s = s + 1

    for t in list(range(dimen - 1)):
        orderk = order[order.tree == t].reset_index(drop=True)
        if t == 0:
            orderk["v1"] = orderk.l
            orderk["v2"] = orderk.r
            rhos = []
            v1_1 = []
            v2_1 = []
            cops = []

            for j in range(len(orderk)):
                v1i = int(orderk.v1[j])  # first node
                v2i = int(orderk.v2[j])  # second node

            locals()["order" + str(t + 1)] = orderk
        else:

            v1k = []
            v2k = []
            for j in range(len(orderk)):
                orderk2 = order[order.tree == t - 1].reset_index(drop=True)
                l = orderk.l[j] + orderk.r[j]
                subnodes = []
                for k in range(len(orderk2)):
                    subnodes.append(sum(1 for item in orderk2.node[k] if item in l))
                subnodes = np.array(subnodes) == len(l) - 1
                orderk2 = orderk2[subnodes].reset_index(drop=True)
                v1k.append(orderk2.node[0])
                v2k.append(orderk2.node[1])
            orderk["v1"] = v1k
            orderk["v2"] = v2k
            locals()["order" + str(t + 1)] = orderk
    n = dimen - 1
    fig, axes = plt.subplots(dimen - 1, 1, figsize=(n * 2, n * 3))  # 5 rows, 1 column
    leg_labels = {}
    if variables != None:
        for i in range(len(variables)):
            leg_labels.update({i: variables[i]})

    for t, ax in zip(
        range(1, dimen), axes.flat
    ):  # Iterate through subplots and loop indices
        if t == 1:
            orderk = locals()["order" + str(t)]
            edges = list(orderk.node)
            edges = [tuple(sublist) for sublist in edges]
            edge_labels = {
                edge: ",".join(map(str, orderk.node[i])) for i, edge in enumerate(edges)
            }
        elif t == 2:
            orderk = locals()["order" + str(t)]
            edges = [
                (",".join(map(str, orderk.v1[i])), ",".join(map(str, orderk.v2[i])))
                for i in range(len(orderk))
            ]
            edge_labels = {
                edge: ",".join(map(str, orderk.node[i])).replace(",|,", "|")
                for i, edge in enumerate(edges)
            }
        else:
            orderk = locals()["order" + str(t)]
            edges = [
                (
                    ",".join(map(str, orderk.v1[i])).replace(",|,", "|"),
                    ",".join(map(str, orderk.v2[i])).replace(",|,", "|"),
                )
                for i in range(len(orderk))
            ]
            edge_labels = {
                edge: ",".join(map(str, orderk.node[i])).replace(",|,", "|")
                for i, edge in enumerate(edges)
            }

        G = nx.Graph()  # Create graph.
        G.add_edges_from(edges)  # Add edges to the graph
        pos = nx.spring_layout(G)  # Position the nodes using the spring layout

        d = nx.degree(G)
        try:
            sizes = len(edges[0][0]) * 400
        except:
            sizes = len(edges[0]) * 400

        nx.draw_networkx_labels(
            G, pos, ax=ax, bbox=dict(facecolor="skyblue"), font_size=13
        )  # Draw labels.

        nx.draw_networkx_edges(
            G,
            pos,
            edge_color="black",
            ax=ax,
        )  # Draw black edges between nodes

        nx.draw_networkx_edge_labels(
            G, pos, edge_labels=edge_labels, font_color="black", ax=ax, font_size=12
        )  # Draw text labels above each edge

        ax.axis("off")  # Remove the box around the plot

        ax.set_title(f"Tree {t}")  # Set title for the subplot

    # title
    if plottitle != None:
        fig.suptitle(plottitle, fontsize=16, y=0.93)
    if variables != None:
        plt.text(
            0.9,
            0.5,
            "\n".join([f"{key}  :  {value}" for key, value in leg_labels.items()]),
            transform=plt.gcf().transFigure,
            fontsize=15,
            verticalalignment="center",
        )
    # save
    if savepath != None:
        plt.savefig(savepath, dpi=300, bbox_inches="tight")
    plt.show()  # Show plot.