Python, MATLAB, Julia, R code: Chapter 5
Intro to Probability for Data Science
Chapters
Lecture Videos + Slides
Selected Exercise Videos
Code and Data
Problem Solutions (Instructor Only)
Purdue Courses
Acknowledgement: The Julia code is written by the contributors listed here.
Chapter 5.2 Joint expectation
Correlation Coefficients
% MATLAB code to visualize correlation coefficients x = mvnrnd([0,0],[3 1; 1 1],1000); % x = mvnrnd([0,0],[3 0; 0 3],1000); % x = mvnrnd([0,0],[3 2.9; 2.9 3],1000); s = scatter(x(:,1),x(:,2),60); s.LineWidth = 1; s.MarkerEdgeColor = [0 0.2 0.6]; s.MarkerFaceColor = [0.9 0.5 0]; axis([-5 5 -5 5]); xticks(-5:5); yticks(-5:5); set(gcf, 'Position', [100, 100, 400, 400]); set(gca,'FontWeight','bold','fontsize',14); std1 = std(x(:,1)); std2 = std(x(:,2)); m1 = mean(x(:,1)); m2 = mean(x(:,2)); Exy = mean(x(:,1).*x(:,2)); r = (Exy-m1*m2)/(std1*std2);
import numpy as np import scipy.stats as stats import matplotlib.pyplot as plt x = stats.multivariate_normal.rvs([0,0], [[3,1],[1,1]], 10000) plt.figure(); plt.scatter(x[:,0],x[:,1]) rho,_ = stats.pearsonr(x[:,0],x[:,1]) print(rho)
using Distributions using Plots x = rand(MvNormal([0, 0], [3 1; 1 1]), 1000) scatter(x[1, :], x[2, :]) sigma1 = std(x[1, :]) sigma2 = std(x[2, :]) mu1 = mean(x[1, :]) mu2 = mean(x[2, :]) Exy = mean(x[1, :] .* x[2, :]) rho = (Exy - mu1 * mu2) / (sigma1 * sigma2)
install.packages("MASS")
library(MASS)
x <- mvrnorm(mu=c(0,0),Sigma=matrix(c(3,1,1,1), 2,2),n=10000)
plot(x[,1], x[,2])
rho = cor(x[,1], x[,2], method=c('pearson'))
print(rho)
Chapter 5.6 Vector of random variables
Mean vector
% MATLAB code to compute a mean vector X = randn(100,2); mX = mean(X,2);
import numpy as np import scipy.stats as stats X = stats.multivariate_normal.rvs([0,0],[[1,0],[0,1]],100) mX = np.mean(X,axis=1)
using Statistics X = randn(100, 2) mean(X, dims=1)
x <- mvrnorm(mu=c(0,0),Sigma=matrix(c(1,0,0,1), 2,2),n=100) mX <- colMeans(x)
Covariance matrix
% MATLAB code to compute covariance matrix X = randn(100,2); covX = cov(X);
import numpy as np import scipy.stats as stats X = stats.multivariate_normal.rvs([0,0],[[1,0],[0,1]],100) covX = np.cov(X,rowvar=False) print(covX)
using Statistics X = randn(100, 2) cov(X)
x <- mvrnorm(mu=c(0,0),Sigma=matrix(c(1,0,0,1), 2,2),n=100) covX <- cov(x) print(covX)
2D Gaussian
% MATLAB code: Overlay random numbers with the Gaussian contour. X = mvnrnd([0 0],[.25 .3; .3 1],1000); x1 = -2.5:.01:2.5; x2 = -3.5:.01:3.5; [X1,X2] = meshgrid(x1,x2); F = mvnpdf([X1(:) X2(:)],[0 0],[.25 .3; .3 1]); F = reshape(F,length(x2),length(x1)); figure(1); scatter(x(:,1),x(:,2),'rx', 'LineWidth', 1.5); hold on; contour(x1,x2,F,[.001 .01 .05:.1:.95 .99 .999], 'LineWidth', 2);
import numpy as np import scipy.stats as stats import matplotlib.pyplot as plt X = stats.multivariate_normal.rvs([0,0],[[0.25,0.3],[0.3,1.0]],1000) x1 = np.arange(-2.5, 2.5, 0.01) x2 = np.arange(-3.5, 3.5, 0.01) X1, X2 = np.meshgrid(x1,x2) Xpos = np.empty(X1.shape + (2,)) Xpos[:,:,0] = X1 Xpos[:,:,1] = X2 F = stats.multivariate_normal.pdf(Xpos,[0,0],[[0.25,0.3],[0.3,1.0]]) plt.scatter(X[:,0],X[:,1]) plt.contour(x1,x2,F)
using Distributions using Plots p = MvNormal([0, 0], [0.25 0.3; 0.3 1]) X = rand(p, 1000) x1 = -2.5:0.01:2.5 x2 = -3.5:0.01:3.5 f(x1, x2) = pdf(p, [x1, x2]) scatter(X[1, :], X[2, :], legend=false) contour!(x1, x2, f, linewidth=2)
install.packages("emdbook")
library(emdbook)
library(pracma)
X <- mvrnorm(mu=c(0,0),Sigma=matrix(c(0.25,0.3,0.3,1.0), 2,2),n=1000)
x1 <- seq(-2.5, 2.49, 0.01)
x2 <- seq(-3.5, 3.49, 0.01)
X1 <- meshgrid(x1,x2)$X
X2 <- meshgrid(x1,x2)$Y
empty_dim <- c(dim(X1), 2)
Xpos <- array(numeric(), empty_dim)
Xpos[,,1] <- X1
Xpos[,,2] <- X2
F <- dmvnorm(x=cbind(c(Xpos[,,1]), c(Xpos[,,2]))[1:250000,]
,mu=c(0,0)
,Sigma=matrix(c(0.25,0.3,0.3,1.0), 2,2))
plot(x[,1], x[,2])
contour(x1,x2,matrix(F, 500,500))
Chapter 5.7 Transformation of Multi-dimensional Gaussian
% MATLAB code: Gaussian(0,1) --> Gaussian(mu,sigma) x = mvnrnd([0,0],[1 0; 0 1],1000); Sigma = [3 -0.5; -0.5 1]; mu = [1; -2]; y = Sigma^(0.5)*x' + mu;
import numpy as np import scipy.stats as stats from scipy.linalg import fractional_matrix_power x = np.random.multivariate_normal([0,0],[[1,0],[0,1]],1000) mu = np.array([1,-2]) Sigma = np.array([[3, -0.5],[-0.5, 1]]) Sigma2 = fractional_matrix_power(Sigma,0.5) y = np.dot(Sigma2, x.T) + np.matlib.repmat(mu,1000,1).T
using Distributions x = rand(MvNormal([0, 0], [1 0; 0 1]), 1000) Sigma = [3 -0.5; -0.5 1] mu = [1, -2] y = Sigma^(1/2) * x .+ mu
install.packages("powerplus")
library("powerplus")
x <- mvrnorm(mu=c(0,0),Sigma=matrix(c(1,0,0,1), 2,2),n=1000)
mu = c(1,-2)
Sigma = matrix(c(3,-0.5,-0.5,1), 2,2)
Sigma2 = Matpow(covY,0.5)
y = Sigma2 %*% t(x) + t(repmat(mu, 1000, 1))
% MATLAB code: Gaussian(mu,sigma) --> Gaussian(0,1) y = mvnrnd([1; -2],[3 -0.5; -0.5 1],100); mY = mean(y); covY = cov(y); x = covY^(-0.5)*(y-mY)';
import numpy as np import scipy.stats as stats from scipy.linalg import fractional_matrix_power y = np.random.multivariate_normal([1,-2],[[3,-0.5],[-0.5,1]],100) mY = np.mean(y,axis=0) covY = np.cov(y,rowvar=False) covY2 = fractional_matrix_power(covY,-0.5) x = np.dot(covY2, (y-np.matlib.repmat(mY,100,1)).T)
using Distributions y = rand(MvNormal([1, -2], [3 -0.5; -0.5 1]), 100) mu = mean(y, dims=2) Sigma = cov(y') x = Sigma^(-1/2) * (y .- mu)
install.packages("powerplus")
library("powerplus")
y <- mvrnorm(mu=c(1,-2),Sigma=matrix(c(3,-0.5,-0.5,1), 2,2),n=100)
mY = colMeans(y)
covY <- cov(y)
covY2 <- Matpow(covY,-0.5)
x = covY2 %*% t(y-repmat(mY,100,1))
Chapter 5.8 Principal component analysis
% MATLAB code to perform the principal component analysis x = mvnrnd([0,0],[2 -1.9; -1.9 2],1000); covX = cov(x); [U,S] = eig(covX); u(:,1) % Principle components u(:,2) % Principle components
import numpy as np x = np.random.multivariate_normal([1,-2],[[3,-0.5],[-0.5,1]],1000) covX = np.cov(x,rowvar=False) S, U = np.linalg.eig(covX) print(U)
using LinearAlgebra using Distributions x = rand(MvNormal([0, 0], [2 -1.9; -1.9 2]), 1000) Sigma = cov(x') S, U = eigen(Sigma) U[:, 1] U[:, 2]
x <- mvrnorm(mu=c(1,-2),Sigma=matrix(c(3,-0.5,-0.5,1), 2,2),n=1000) covX <- cov(x) U <- eigen(covX)$vectors print(U)





