Nikolaj-K · August 31, 2025 21:01
diff --git a/dependent_bernoulli.py b/dependent_bernoulli.py
 """
 Code and prompt used in the video
 https://youtu.be/xcE_0azawvM
 """

 """You'll get two tasks
 First part:
 Consider a joint distirbution of two random variables X_k with k in {a, b} (i.e. two random variables X_a and X_b),
 over a finite outcome sets of size n_a and n_b.
 Explain what the marginal distributions are.
 Also explain what the correlated part \\Delta is. The value \\Delta will play a particular role.
 Then, for two functions f_a and f_b (taking values in an appropriate ring structure), express Cov( f_k(X_a), f_l(X_b)) in terms of it.
 I suggest you denote difference of function values, if they occur, with lowercase \\delta f_k and the appropriate indices.

 Style guide:
 To distinguish the two random variables, as above, use index k, taking values in {a, b}.
 When the random variable's values have to be distinguished, write lower case x_{i, k}, where the index i (also j may be used) enumerates the elements of the two finite sets, respectively. Write formulas in terms of simplest factors (e.g. prefer a·(b+c) over a·b+a·c.) Likewise, factor out common factors shared by matrix entries.
 Don't be afraid to explain things in words, as long as you stay formal.
 Mention the requirements set on certain variables for expressions to be well-defined, and theorems to hold. (E.g., in this case, the Fréchet inequalities, when they come up.)
 Make sure not to overdefine a symbol and use it twice.
 Use vector and matrix notation, where possible.
 Use bold letters for vectors and matrices. Use a sentence to make all dimensions of such objects explicit, for the reader.
 Spell out technical requirements on all mathematical structures you encounter. This for example includes domain/codomain restrictions, positivity restrictions, etc.

 The second part concerns Bernoulli-like distirbutions, except they take general values.
 Here is some code and then a task to compute a covariance below:
 """

 import os
 import numpy as np
 from termcolor import colored


 os.system("clear")


 class Config:
    NUM_ITERS: int = 10**7  # number of samples


 class JointDist:
    """
    Joint probability distribution of two dependent random variables X_a and X_b
    with Bernoulli-like marginals. Δ_psapsb encodes the correlated part of the joint success.
    """

    MARG_psa: float = 0.30
    MARG_psb: float = 0.47
    Δ_psapsb: float = 0.1  # correlated excess probability
    PAIRS_LOG = ["ss", "sf", "fs", "ff"]

    @classmethod
    def get(cls) -> list[float]:
        """Variant of __get, but with __validate_and_log call"""
        props = cls.__get()
        cls.__validate_and_log(props)
        return props

    @classmethod
    def __get(cls) -> list[float]:
        """Compute distribution from parametrization via the marginals and Δ"""
        psapsb = cls.MARG_psa * cls.MARG_psb + cls.Δ_psapsb
        psaqfb = cls.MARG_psa - psapsb
        qfapsb = cls.MARG_psb - psapsb
        prob_not_qsaqsb = psapsb + psaqfb + qfapsb
        assert 0 <= prob_not_qsaqsb <= 1, "Invalid probability values"
        qsaqsb = 1 - prob_not_qsaqsb
        return [psapsb, psaqfb, qfapsb, qsaqsb]

    @classmethod
    def __validate_and_log(cls, props) -> None:
        """Check the Frechet bounds, etc., and log all relevant probabilities."""
        psapsb, psaqfb, qfapsb, qsaqsb = props
        low = max(0, cls.MARG_psa + cls.MARG_psb - 1)
        high = min(
            cls.MARG_psa, cls.MARG_psb
        )  # E.g. both X_a and X_b having success can't be more common than the max of the marginal chances
        assert low <= psapsb <= high, f"Δ_psapsb out of bounds ({cls.Δ_psapsb})"

        # fmt: off
        print(colored(f"[JointDist.get] Set probabilities\nMARG_psa, MARG_psb, Δ_psapsb:\n"
                      f"{cls.MARG_psa:.3f}, {cls.MARG_psb:.3f}, {cls.Δ_psapsb:.3f}", "magenta"))
        print(colored(f"[JointDist.get] Joint probabilities\npsapsb, psaqfb, qfapsb, qsaqsb:\n"
                      f"{psapsb:.3f}, {psaqfb:.3f}, {qfapsb:.3f}, {qsaqsb:.3f}", "magenta", attrs=["bold"]))
        # fmt: on


 if __name__ == "__main__":
    dist = JointDist()
    probabilities = dist.get()
    psapsb_log = probabilities[0]

    sampled_indices = np.random.choice(4, size=Config.NUM_ITERS, p=probabilities)
    Xa = (sampled_indices == 0) | (sampled_indices == 1)
    Xb = (sampled_indices == 0) | (sampled_indices == 2)

    succ_rate_Xa = Xa.mean()
    succ_rate_Xb = Xb.mean()
    succ_rate_joint = np.mean((Xa == 1) & (Xb == 1))
    succ_rate_Xb_given_Xa = succ_rate_joint / succ_rate_Xa
    succ_rate_Xa_given_Xb = succ_rate_joint / succ_rate_Xb

    cov_mat = np.cov(Xa, Xb, ddof=0)
    cov_mat_ab = cov_mat[0, 1]

    # fmt: off
    NUM_SAMPLES_LOG = 27
    first_few_samples = [dist.PAIRS_LOG[idx] for idx in sampled_indices[:NUM_SAMPLES_LOG]]
    print()
    print(colored(f"First few samples (Xa, Xb), {NUM_SAMPLES_LOG} of {Config.NUM_ITERS}:\n{', '.join(first_few_samples)} ...", "white"))
    print()
    print(colored(f"{succ_rate_Xa:.6f} ... empirical P(Xa=s). \tDiff: {succ_rate_Xa - dist.MARG_psa:.6f}", "green"))
    print(colored(f"{succ_rate_Xb:.6f} ... empirical P(Xb=s). \tDiff: {succ_rate_Xb - dist.MARG_psb:.6f}", "green"))
    print(colored(f"{succ_rate_Xa * succ_rate_Xb:.6f} ... product of those empirical values (expression for P(Xa=s, Xb=s) if independent)", "yellow"))
    print(colored(f"{succ_rate_joint:.6f} ... empirical P(Xa=s, Xb=s). \tDiff: {succ_rate_joint - psapsb_log:.6f}", "green"))
    print(colored(f"{succ_rate_Xb_given_Xa:.6f} ... empirical P(Xb=s | Xa=s). \tDiff: {succ_rate_Xb_given_Xa - psapsb_log/dist.MARG_psa:.6f}", "green"))
    print(colored(f"{succ_rate_Xa_given_Xb:.6f} ... empirical P(Xa=s | Xb=s). \tDiff: {succ_rate_Xa_given_Xb - psapsb_log/dist.MARG_psb:.6f}", "green"))
    print(colored(f"{cov_mat_ab:.6f} ... empirical Cov_ab := Cov(Xa, Xb)", "cyan"))
    print(colored(f"Theorem: cov_mat_ab == dist.Δ_psapsb. \tEmpirical validation via diff: {cov_mat_ab - dist.Δ_psapsb:.6f}", "green"))
    print()
    print(colored(f"Cov_ij / Cov_aa =\n{cov_mat / cov_mat[0,0]}", "cyan"))
    # fmt: on

    def implies(P: bool, Q: bool):
        return (not P) or Q

    Xor = (xa != xb for xa, xb in zip(Xa, Xb))
    Equiv = (xa == xb for xa, xb in zip(Xa, Xb))
    for equiv, xor in zip(Xor, Equiv):
        assert implies(not equiv, xor)
        # The random variables Equiv and Xor are dependent.
        # This is notable insofar as something akin to Cov(not Equiv, Xor) can be zeor. (see video)


 """To fix notation, let X_k with k in {a,b} be two random variables distributed as Bernoulli process, with X_k taking values x_{p, a} with probabilities p_k, X_k taking values x_{q, k} with probabilities q_k := 1 - p_k.
 Note that the finite sets are now each of cardinality 2 and we use p and q as first and second index values, for either. (If the index value is not fixed, still use i and j.)
 Denote the joint distribution by upper case P_{p, q} and the marginals by lowercase letter.
 Make the assumption that in the following, all four values of P are positive (i.e. all pairs will be see) and state this assumption to the reader.
 Uuse the indices value {p, q} for the correlated part \\Delta when addressing particular values (the index variables are still i and j for those). The matrix value \\Delta_{p,p} will play a relevant role.
 For this task still use lower cas x for the values of the random variables.
 Make explicit the simplifications that happen for Bernoulli case, compared to the previous task.
 The next task is, for two functions f_k, to compute Cov( f_a(X_a), f_b(X_b)).
 Also, apart from the Cov involving the functions, spell out the corresponding correlation (Corr), as well as the functional form of the Var(X_k) (without functions.)

 For the final task, assume both X_k (previously lowercase x's) take value in the same fixed set {s, f}, where s and f are distinct elements of some ring (e.g. addition and subtraction of those values is possible and s-f is non-zero.)
 Hence, C:=Xa+Xb and D:=Xa-Xb.
 Make a table of the different return values.
 Use this to validate that s+f is among the possible values of C and compute P(C=s+f | D=d).
 Finally, compute Cov(C, D) and Corr(C, D). (If this is related to Var(X_k), then it should be expressed in those terms.)
 Reflect on independence vs. its correlation, and reflect on its relation to the correlation of the joint probability of the X_k's.

 Notes on your presentation:
 Make references to the code, where possible - but refrain from making assumptions just because you see they were made in the code. I.e. don't make simplifications beyond what's found in the explicit instructions.
 Focus on results of the calculations - you do not have to write down inbetween steps if I did not ask for it.
 Choose a canonical form to represent the final results.
 Also, where possible, express quantities with reference in terms of variance, as opposed to individual probability values.
 To do all this, mention the method of indicators, where applicable.
 You won't have to talk too much about expectation values here - mostly only correlations and conditional probabilities.
 """
	"""
	Code and prompt used in the video
	https://youtu.be/xcE_0azawvM
	"""

	"""You'll get two tasks
	First part:
	Consider a joint distirbution of two random variables X_k with k in {a, b} (i.e. two random variables X_a and X_b),
	over a finite outcome sets of size n_a and n_b.
	Explain what the marginal distributions are.
	Also explain what the correlated part \\Delta is. The value \\Delta will play a particular role.
	Then, for two functions f_a and f_b (taking values in an appropriate ring structure), express Cov( f_k(X_a), f_l(X_b)) in terms of it.
	I suggest you denote difference of function values, if they occur, with lowercase \\delta f_k and the appropriate indices.

	Style guide:
	To distinguish the two random variables, as above, use index k, taking values in {a, b}.
	When the random variable's values have to be distinguished, write lower case x_{i, k}, where the index i (also j may be used) enumerates the elements of the two finite sets, respectively. Write formulas in terms of simplest factors (e.g. prefer a·(b+c) over a·b+a·c.) Likewise, factor out common factors shared by matrix entries.
	Don't be afraid to explain things in words, as long as you stay formal.
	Mention the requirements set on certain variables for expressions to be well-defined, and theorems to hold. (E.g., in this case, the Fréchet inequalities, when they come up.)
	Make sure not to overdefine a symbol and use it twice.
	Use vector and matrix notation, where possible.
	Use bold letters for vectors and matrices. Use a sentence to make all dimensions of such objects explicit, for the reader.
	Spell out technical requirements on all mathematical structures you encounter. This for example includes domain/codomain restrictions, positivity restrictions, etc.

	The second part concerns Bernoulli-like distirbutions, except they take general values.
	Here is some code and then a task to compute a covariance below:
	"""

	import os
	import numpy as np
	from termcolor import colored


	os.system("clear")


	class Config:
	NUM_ITERS: int = 10**7 # number of samples


	class JointDist:
	"""
	Joint probability distribution of two dependent random variables X_a and X_b
	with Bernoulli-like marginals. Δ_psapsb encodes the correlated part of the joint success.
	"""

	MARG_psa: float = 0.30
	MARG_psb: float = 0.47
	Δ_psapsb: float = 0.1 # correlated excess probability
	PAIRS_LOG = ["ss", "sf", "fs", "ff"]

	@classmethod
	def get(cls) -> list[float]:
	"""Variant of __get, but with __validate_and_log call"""
	props = cls.__get()
	cls.__validate_and_log(props)
	return props

	@classmethod
	def __get(cls) -> list[float]:
	"""Compute distribution from parametrization via the marginals and Δ"""
	psapsb = cls.MARG_psa * cls.MARG_psb + cls.Δ_psapsb
	psaqfb = cls.MARG_psa - psapsb
	qfapsb = cls.MARG_psb - psapsb
	prob_not_qsaqsb = psapsb + psaqfb + qfapsb
	assert 0 <= prob_not_qsaqsb <= 1, "Invalid probability values"
	qsaqsb = 1 - prob_not_qsaqsb
	return [psapsb, psaqfb, qfapsb, qsaqsb]

	@classmethod
	def __validate_and_log(cls, props) -> None:
	"""Check the Frechet bounds, etc., and log all relevant probabilities."""
	psapsb, psaqfb, qfapsb, qsaqsb = props
	low = max(0, cls.MARG_psa + cls.MARG_psb - 1)
	high = min(
	cls.MARG_psa, cls.MARG_psb
	) # E.g. both X_a and X_b having success can't be more common than the max of the marginal chances
	assert low <= psapsb <= high, f"Δ_psapsb out of bounds ({cls.Δ_psapsb})"

	# fmt: off
	print(colored(f"[JointDist.get] Set probabilities\nMARG_psa, MARG_psb, Δ_psapsb:\n"
	f"{cls.MARG_psa:.3f}, {cls.MARG_psb:.3f}, {cls.Δ_psapsb:.3f}", "magenta"))
	print(colored(f"[JointDist.get] Joint probabilities\npsapsb, psaqfb, qfapsb, qsaqsb:\n"
	f"{psapsb:.3f}, {psaqfb:.3f}, {qfapsb:.3f}, {qsaqsb:.3f}", "magenta", attrs=["bold"]))
	# fmt: on


	if __name__ == "__main__":
	dist = JointDist()
	probabilities = dist.get()
	psapsb_log = probabilities[0]

	sampled_indices = np.random.choice(4, size=Config.NUM_ITERS, p=probabilities)
	Xa = (sampled_indices == 0) \| (sampled_indices == 1)
	Xb = (sampled_indices == 0) \| (sampled_indices == 2)

	succ_rate_Xa = Xa.mean()
	succ_rate_Xb = Xb.mean()
	succ_rate_joint = np.mean((Xa == 1) & (Xb == 1))
	succ_rate_Xb_given_Xa = succ_rate_joint / succ_rate_Xa
	succ_rate_Xa_given_Xb = succ_rate_joint / succ_rate_Xb

	cov_mat = np.cov(Xa, Xb, ddof=0)
	cov_mat_ab = cov_mat[0, 1]

	# fmt: off
	NUM_SAMPLES_LOG = 27
	first_few_samples = [dist.PAIRS_LOG[idx] for idx in sampled_indices[:NUM_SAMPLES_LOG]]
	print()
	print(colored(f"First few samples (Xa, Xb), {NUM_SAMPLES_LOG} of {Config.NUM_ITERS}:\n{', '.join(first_few_samples)} ...", "white"))
	print()
	print(colored(f"{succ_rate_Xa:.6f} ... empirical P(Xa=s). \tDiff: {succ_rate_Xa - dist.MARG_psa:.6f}", "green"))
	print(colored(f"{succ_rate_Xb:.6f} ... empirical P(Xb=s). \tDiff: {succ_rate_Xb - dist.MARG_psb:.6f}", "green"))
	print(colored(f"{succ_rate_Xa * succ_rate_Xb:.6f} ... product of those empirical values (expression for P(Xa=s, Xb=s) if independent)", "yellow"))
	print(colored(f"{succ_rate_joint:.6f} ... empirical P(Xa=s, Xb=s). \tDiff: {succ_rate_joint - psapsb_log:.6f}", "green"))
	print(colored(f"{succ_rate_Xb_given_Xa:.6f} ... empirical P(Xb=s \| Xa=s). \tDiff: {succ_rate_Xb_given_Xa - psapsb_log/dist.MARG_psa:.6f}", "green"))
	print(colored(f"{succ_rate_Xa_given_Xb:.6f} ... empirical P(Xa=s \| Xb=s). \tDiff: {succ_rate_Xa_given_Xb - psapsb_log/dist.MARG_psb:.6f}", "green"))
	print(colored(f"{cov_mat_ab:.6f} ... empirical Cov_ab := Cov(Xa, Xb)", "cyan"))
	print(colored(f"Theorem: cov_mat_ab == dist.Δ_psapsb. \tEmpirical validation via diff: {cov_mat_ab - dist.Δ_psapsb:.6f}", "green"))
	print()
	print(colored(f"Cov_ij / Cov_aa =\n{cov_mat / cov_mat[0,0]}", "cyan"))
	# fmt: on

	def implies(P: bool, Q: bool):
	return (not P) or Q

	Xor = (xa != xb for xa, xb in zip(Xa, Xb))
	Equiv = (xa == xb for xa, xb in zip(Xa, Xb))
	for equiv, xor in zip(Xor, Equiv):
	assert implies(not equiv, xor)
	# The random variables Equiv and Xor are dependent.
	# This is notable insofar as something akin to Cov(not Equiv, Xor) can be zeor. (see video)


	"""To fix notation, let X_k with k in {a,b} be two random variables distributed as Bernoulli process, with X_k taking values x_{p, a} with probabilities p_k, X_k taking values x_{q, k} with probabilities q_k := 1 - p_k.
	Note that the finite sets are now each of cardinality 2 and we use p and q as first and second index values, for either. (If the index value is not fixed, still use i and j.)
	Denote the joint distribution by upper case P_{p, q} and the marginals by lowercase letter.
	Make the assumption that in the following, all four values of P are positive (i.e. all pairs will be see) and state this assumption to the reader.
	Uuse the indices value {p, q} for the correlated part \\Delta when addressing particular values (the index variables are still i and j for those). The matrix value \\Delta_{p,p} will play a relevant role.
	For this task still use lower cas x for the values of the random variables.
	Make explicit the simplifications that happen for Bernoulli case, compared to the previous task.
	The next task is, for two functions f_k, to compute Cov( f_a(X_a), f_b(X_b)).
	Also, apart from the Cov involving the functions, spell out the corresponding correlation (Corr), as well as the functional form of the Var(X_k) (without functions.)

	For the final task, assume both X_k (previously lowercase x's) take value in the same fixed set {s, f}, where s and f are distinct elements of some ring (e.g. addition and subtraction of those values is possible and s-f is non-zero.)
	Hence, C:=Xa+Xb and D:=Xa-Xb.
	Make a table of the different return values.
	Use this to validate that s+f is among the possible values of C and compute P(C=s+f \| D=d).
	Finally, compute Cov(C, D) and Corr(C, D). (If this is related to Var(X_k), then it should be expressed in those terms.)
	Reflect on independence vs. its correlation, and reflect on its relation to the correlation of the joint probability of the X_k's.

	Notes on your presentation:
	Make references to the code, where possible - but refrain from making assumptions just because you see they were made in the code. I.e. don't make simplifications beyond what's found in the explicit instructions.
	Focus on results of the calculations - you do not have to write down inbetween steps if I did not ask for it.
	Choose a canonical form to represent the final results.
	Also, where possible, express quantities with reference in terms of variance, as opposed to individual probability values.
	To do all this, mention the method of indicators, where applicable.
	You won't have to talk too much about expectation values here - mostly only correlations and conditional probabilities.
	"""