More Data Analytics here, Here is a proof for the Variance Maximum Likelihood Estimator as a biased estimator of Variance.

Hopefully I did everything right there.

I also wrote matlab code that compares the Sample Variance, which is shown here:

to the Maximum Likelihood Estimator for Variance.

Here is what the code output will look like :

%% __________________________BEGIN MLE MATLAB CODE ________________________

%% ------------------------------------------------------------------------------------------------------------------

% Variance Maximum likelihood estimator tests

% @author Robert Morris - Delaware State University

% Data Analytics - Project 2

% May take 5 - 10 seconds or so to run.

% This Project Starts With a randomly generated

% Population of weights for 100,000 individuals.

% It then caluculates the mean and variance of the weights

% Each Graph/plot is explained, please Maximize figure after running for

% Full effect.

clc; clear; close all

numPopulation= 1000000;

maxWeight= 300;

minWeight= 100;

populationWeights= zeros(1,numPopulation);

% randomly create population of ages

for i=1:1:numPopulation

populationWeights(1,i) = randi([minWeight,maxWeight], 1);

end

meanWeight= mean(populationWeights); % find trandomly generated

% Population of weights for 100,000 individuals.

% It then caluculates the mean and variance of the weights

% In the overall population.

% Then it conducts a test of 10 different previously selected

% sample sizes from the population. For each sample size, a 1000

% trials are conducted in which the program calculates the sample size

% mean and variance, recording eahistogram(MLEVarsHisto, bins, 'facecolor', 'm');ch value during each trial.

% at the end of each trial the overall sample variance mean, sample

% variance max outlier, and sample variance min outlier of the trials are recorded.

% After the trials are ran for each previously selected sample size, the results are

% displayed in a figure for comparrison. The plot in the figure shows how the

% sample variance numbers converge toward the overall population weight variance

% as the sample size grows. he population mean weight.

weight= 0;

xtickangle(45);

for i=1:1:numPopulation

weight= weight + (populationWeights(1,i) - meanWeight)^2;

end

weightVar= weight/numPopulation; % notice original variance calculation of size n

experiments=10;

sampleSize=100;

sampleXaxis = zeros(1,experiments);

actualWeightVars = zeros(1, experiments);

MLEVars = zeros(1, experiments);

sampleVars = zeros(1, experiments);

samples = zeros(1,sampleSize);

sampMeanWeight = 0;

sampleVariance = 0;

MLEVariance = 0;

for j=1:1:experiments % sample mean Finder

for k=1:1:sampleSize

whichPerson= randi(numPopulation-1,1) + 1;

samples(1,k) = populationWeights(1,whichPerson);

end

sampleMean= mean(samples); % find the sample mean

sampleWeight= 0;

for l=1:1:sampleSize % sample Variance Finder

sampleWeight = sampleWeight + (samples(1,l)-sampleMean)^2;

end

sampleVariance = (1/(sampleSize-1)) * sampleWeight; % sample variance calculation of size n-1

MLEVariance = (1/(sampleSize)) * sampleWeight; % max likelihood sample calculation of size n

SampleVars(1,j) = sampleVariance;

MLEVars(1, j) = MLEVariance;

actualWeightVars(1, j) = weightVar;

sampleXaxis(1,j) = sampleSize;

sampleSize = sampleSize * 2;

end

fig = figure;

set(0, 'defaultfigureposition', [1300 10 900 600])

fig.NumberTitle = 'off';

fig.Name = 'Variance Maximum Likelihood Test';

hold on;

subplot(2,2,1);

x = linspace(1,10,10);

plot(x, SampleVars, 'b', x, MLEVars, 'g', x, actualWeightVars, 'r--', 'lineWidth', 2);

xticks([1,2,3,4,5,6,7,8,9,10]);

xticklabels({'100','200','400','800','1600','3200','6400','12800','25600', '51200'});

title('Variance Estimators Comparison');

xlabel('Variance Experiment Sample Sizes');

ylabel('Calculated Variance For Weights');

xtickangle(45);

legend('show');

legend({'Sample Variance', 'Variance MLE', 'Population Variance'});

lcn = 'northeast';

testMLEVarianceMax = zeros(1,experiments);

testMLEVarianceMin = zeros(1,experiments);

testMLEVarianceMean = zeros(1,experiments);

actualWeightVars = zeros(1, experiments);

sampleSize=10;

for n=1:1:experiments

samples= zeros(1,sampleSize);

numTrials=500;

sampMeanWeight = 0;xtickangle(45);

MLEVars = zeros(1, numTrials);

MLEVarMax = 0;

MLEVarMin = 0;

MLEMean = 0;

for j=1:1:numTrials % sample mean Finder

for k=1:1:sampleSize

whichPerson= randi(numPopulation-1,1) + 1;

samples(1,k) = populationWeights(1,whichPerson);

end

sampleMean= mean(samples); % find the sample mean

sampleWeight= 0;

for l=1:1:sampleSize % sample Variance Finder

sampleWeight = sampleWeight + (samples(1,l)-sampleMean)^2;

end

MLEVariance = (1/(sampleSize)) * sampleWeight; % sample variance calculation of size n-1

MLEVars(1,j) = MLEVariance;

end

MLEVarMax = max( MLEVars ); % max outlier of the sample variance

MLEVarMin = min( MLEVars ); % min outlier of the sample variance

MLEMean = mean( MLEVars ); % the average sample variance of the samples

testMLEVarianceMax(1,n) = MLEVarMax;

testMLEVarianceMin(1,n) = MLEVarMin;

testMLEVarianceMean(1,n) = MLEMean;

actualWeightVars(1,n) = weightVar;

sampleSize = sampleSize*2;

end

subplot(2,2,2);

x = linspace(1,10,10);

plot(x, testMLEVarianceMax, 'b--', x, testMLEVarianceMin, 'g--', x, testMLEVarianceMean, 'c--*', x, actualWeightVars, 'r--', 'LineWidth',2);

xticks([1,2,3,4,5,6,7,8,9,10]);

xticklabels({'10','20','40','80','160','320','640','1280','2560', '5120'});

title('MLE Variance Convergence');

xlabel('Variance Sample Sizes');

ylabel('Calculated Variance For Weights');

xtickangle(45);

legend('show');

legend({'MLE Var Max', 'MLE Var Min', 'MLE Var Mean', 'Population Variance'});

lcn = 'northeast';

histoTestSize = 1000;

MLEVarsHisto = zeros(1, histoTestSize);

sampleSize = 5000;

samples = zeros(1,sampleSize);

for t=1:1:histoTestSize

for p=1:1:sampleSize

whichPerson= randi(numPopulation-1,1) + 1;

samples(1,p) = populationWeights(1,whichPerson);

end

sampleMean= mean(samples); % find the sample mean

sampleWeight= 0;

for m=1:1:sampleSize % MLE Variance Helper

sampleWeight = sampleWeight + (samples(1,m)-sampleMean)^2;

end

MLEVariance = (1/(sampleSize)) * sampleWeight; % max likelihood sample calculation of size n

MLEVarsHisto(1, t) = MLEVariance;

end

SampleVarsHisto2 = zeros(1, histoTestSize);

sampleSize2 = 5000;

samples2 = zeros(1,sampleSize2);

sampleVariance2 = 0;

for t=1:1:histoTestSize

for p=1:1:sampleSize2

whichPerson= randi(numPopulation-1,1) + 1;

samples2(1,p) = populationWeights(1,whichPerson);

end

sampleMean= mean(samples2); % find the sample mean

sampleWeight= 0;

for m=1:1:sampleSize2 % MLE Variance Helper

sampleWeight = sampleWeight + (samples2(1,m)-sampleMean)^2;

end

sampleVariance2 = (1/(sampleSize2-1)) * sampleWeight; % max likelihood sample calculation of size n

SampleVarsHisto2(1, t) = sampleVariance2;

end

histoEffect = zeros(1,80);

histoEffect2 = zeros(1,80);

for v = 1:1:80

histoEffect(1, v) = weightVar;

end

bins = 50;

subplot(2,2,3);

hold on;

histogram(MLEVarsHisto, bins, 'facecolor', 'm');

title('MLE Variance Distribution, 5k Sample Size Test ');

xlabel('MLE Variances ');

ylabel('Totals Calculated');

xtickangle(45);

histogram(histoEffect, bins, 'facecolor', 'r', 'BinWidth', 2);

subplot(2,2,4);

hold on;

histogram(SampleVarsHisto2, bins, 'facecolor', 'g');

title('Sample Variance Distribution, 5k Sample Size Test ');

xlabel('Sample Variances ');

ylabel('Totals Calculated');

xtickangle(45);

histogram(histoEffect, bins, 'facecolor', 'r', 'BinWidth', 2);

%% ------------------------------------------------------------------------------------------------------------------

%% ____________________________ END MLE MATLAB CODE _______________________