def Combinatorial(n,i):
'''设计组合数'''
#n>=i
Min=min(i,n-i)
result=1
for j in range(0,Min):
#由于浮点数精度问题不能用//
result=result*(n-j)/(Min-j)
return result
if __name__ == '__main__':
print(int(Combinatorial(45,2)))
使用第三方模块scipy计算排列组合的具体数值
from scipy.special import comb, perm
#计算排列数
A=perm(3,2)
#计算组合数
C=comb(45,2)
print(A,C)
使用阶乘的方式求组合数
import math
def factorial_me(n):
'''建立求阶乘的函数'''
result = 1
for i in range(2, n + 1):
result = result * i
return result
def comb_1(n,m):
# 直接使用math里的阶乘函数计算组合数
return math.factorial(n)//(math.factorial(n-m)*math.factorial(m))
def comb_2(n,m):
# 使用自己的阶乘函数计算组合数
return factorial_me(n)//(factorial_me(n-m)*factorial_me(m))
def perm_1(n,m):
# 直接使用math里的阶乘函数计算排列数
return math.factorial(n)//math.factorial(n-m)
def perm_2(n,m):
# 使用自己的阶乘函数计算排列数
return factorial_me(n)//factorial_me(n-m)
if __name__ == '__main__':
print(factorial_me(6))
print(comb_1(45,2))
print(comb_2(45,2))
print(perm_1(45,2))
print(perm_2(45,2))
使用itertools列出排列组合的全部情况
from itertools import combinations, permutations
# 列举排列结果[(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]
print(list(permutations([i for i in range(1,4)],2)))
#列举组合结果[(1, 2), (1, 3), (2, 3)]
print(list(combinations([1,2,3],2)))
Python gzip module provides a very simple way to compress and decompress files and work in a similar manner to GNU programs gzip and gunzip.
编写压缩文件
import gzip
import io
import os
output_file_name = 'jd_example.txt.gz'
file_mode = 'wb'
with gzip.open(output_file_name, file_mode) as output:
with io.TextIOWrapper(output, encoding='utf-8') as encode:
encode.write('We can write anything in the file here.\n')
print(output_file_name,
'contains', os.stat(output_file_name).st_size, 'bytes')
os.system('file -b --mime {}'.format(output_file_name))
读取压缩文件
import gzip
import io
read_file_name = 'jd_example.txt.gz'
file_mode = 'rb'
with gzip.open(read_file_name, file_mode) as input_file:
with io.TextIOWrapper(input_file, encoding='utf-8') as dec:
print(dec.read())
# i = io.TextIOWrapper(gzip.open(input_gz, "rb"), encoding='utf-8')
# use this code will return an object "i" just like commom "open" does,
# so you can use "i.readline()" or "for line in i" and so on
17 Statistical Hypothesis Tests in Python
Normality Tests
This section lists statistical tests that you can use to check if your data has a Gaussian distribution.
Shapiro-Wilk Test
Tests whether a data sample has a Gaussian distribution.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Interpretation
H0: the sample has a Gaussian distribution.
H1: the sample does not have a Gaussian distribution.
Python Code
# Example of the Shapiro-Wilk Normality Test
from scipy.stats import shapiro
data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
stat, p = shapiro(data)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
More Information
D’Agostino’s K^2 Test
Tests whether a data sample has a Gaussian distribution.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Interpretation
H0: the sample has a Gaussian distribution.
H1: the sample does not have a Gaussian distribution.
Python Code
# Example of the D'Agostino's K^2 Normality Test
from scipy.stats import normaltest
data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
stat, p = normaltest(data)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Gaussian')
else:
print('Probably not Gaussian')
More Information
Anderson-Darling Test
Tests whether a data sample has a Gaussian distribution.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Interpretation
H0: the sample has a Gaussian distribution.
H1: the sample does not have a Gaussian distribution.
Python Code
# Example of the Anderson-Darling Normality Test
from scipy.stats import anderson
data = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
result = anderson(data)
print('stat=%.3f' % (result.statistic))
for i in range(len(result.critical_values)):
sl, cv = result.significance_level[i], result.critical_values[i]
if result.statistic < cv:
print('Probably Gaussian at the %.1f%% level' % (sl))
else:
print('Probably not Gaussian at the %.1f%% level' % (sl))
More Information
Correlation Tests
This section lists statistical tests that you can use to check if two samples are related.
Pearson’s Correlation Coefficient
Tests whether two samples have a linear relationship.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample are normally distributed.
Observations in each sample have the same variance.
Interpretation
H0: the two samples are independent.
H1: there is a dependency between the samples.
Python Code
# Example of the Pearson's Correlation test
from scipy.stats import pearsonr
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]
stat, p = pearsonr(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably independent')
else:
print('Probably dependent')
More Information
Spearman’s Rank Correlation
Tests whether two samples have a monotonic relationship.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample can be ranked.
Interpretation
H0: the two samples are independent.
H1: there is a dependency between the samples.
Python Code
# Example of the Spearman's Rank Correlation Test
from scipy.stats import spearmanr
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]
stat, p = spearmanr(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably independent')
else:
print('Probably dependent')
More Information
Kendall’s Rank Correlation
Tests whether two samples have a monotonic relationship.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample can be ranked.
Interpretation
H0: the two samples are independent.
H1: there is a dependency between the samples.
Python Code
# Example of the Kendall's Rank Correlation Test
from scipy.stats import kendalltau
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [0.353, 3.517, 0.125, -7.545, -0.555, -1.536, 3.350, -1.578, -3.537, -1.579]
stat, p = kendalltau(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably independent')
else:
print('Probably dependent')
More Information
Chi-Squared Test
Tests whether two categorical variables are related or independent.
Assumptions
Observations used in the calculation of the contingency table are independent.
25 or more examples in each cell of the contingency table.
Interpretation
H0: the two samples are independent.
H1: there is a dependency between the samples.
Python Code
# Example of the Chi-Squared Test
from scipy.stats import chi2_contingency
table = [[10, 20, 30],[6, 9, 17]]
stat, p, dof, expected = chi2_contingency(table)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably independent')
else:
print('Probably dependent')
More Information
Stationary Tests
This section lists statistical tests that you can use to check if a time series is stationary or not.
Augmented Dickey-Fuller Unit Root Test
Tests whether a time series has a unit root, e.g. has a trend or more generally is autoregressive.
Assumptions
Observations in are temporally ordered.
Interpretation
H0: a unit root is present (series is non-stationary).
H1: a unit root is not present (series is stationary).
Python Code
# Example of the Augmented Dickey-Fuller unit root test
from statsmodels.tsa.stattools import adfuller
data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
stat, p, lags, obs, crit, t = adfuller(data)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably not Stationary')
else:
print('Probably Stationary')
More Information
Kwiatkowski-Phillips-Schmidt-Shin
Tests whether a time series is trend stationary or not.
Assumptions
Observations in are temporally ordered.
Interpretation
H0: the time series is trend-stationary.
H1: the time series is not trend-stationary.
Python Code
# Example of the Kwiatkowski-Phillips-Schmidt-Shin test
from statsmodels.tsa.stattools import kpss
data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
stat, p, lags, crit = kpss(data)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably Stationary')
else:
print('Probably not Stationary')
More Information
Parametric Statistical Hypothesis Tests
This section lists statistical tests that you can use to compare data samples.
Student’s t-test
Tests whether the means of two independent samples are significantly different.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample are normally distributed.
Observations in each sample have the same variance.
Interpretation
H0: the means of the samples are equal.
H1: the means of the samples are unequal.
Python Code
# Example of the Student's t-test
from scipy.stats import ttest_ind
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = ttest_ind(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Paired Student’s t-test
Tests whether the means of two paired samples are significantly different.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample are normally distributed.
Observations in each sample have the same variance.
Observations across each sample are paired.
Interpretation
H0: the means of the samples are equal.
H1: the means of the samples are unequal.
Python Code
# Example of the Paired Student's t-test
from scipy.stats import ttest_rel
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = ttest_rel(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Analysis of Variance Test (ANOVA)
Tests whether the means of two or more independent samples are significantly different.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample are normally distributed.
Observations in each sample have the same variance.
Interpretation
H0: the means of the samples are equal.
H1: one or more of the means of the samples are unequal.
Python Code
# Example of the Analysis of Variance Test
from scipy.stats import f_oneway
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]
stat, p = f_oneway(data1, data2, data3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Repeated Measures ANOVA Test
Tests whether the means of two or more paired samples are significantly different.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample are normally distributed.
Observations in each sample have the same variance.
Observations across each sample are paired.
Interpretation
H0: the means of the samples are equal.
H1: one or more of the means of the samples are unequal.
Python Code
Currently not supported in Python.
More Information
Nonparametric Statistical Hypothesis Tests
Mann-Whitney U Test
Tests whether the distributions of two independent samples are equal or not.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample can be ranked.
Interpretation
H0: the distributions of both samples are equal.
H1: the distributions of both samples are not equal.
Python Code
# Example of the Mann-Whitney U Test
from scipy.stats import mannwhitneyu
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = mannwhitneyu(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Wilcoxon Signed-Rank Test
Tests whether the distributions of two paired samples are equal or not.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample can be ranked.
Observations across each sample are paired.
Interpretation
H0: the distributions of both samples are equal.
H1: the distributions of both samples are not equal.
Python Code
# Example of the Wilcoxon Signed-Rank Test
from scipy.stats import wilcoxon
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = wilcoxon(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Kruskal-Wallis H Test
Tests whether the distributions of two or more independent samples are equal or not.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample can be ranked.
Interpretation
H0: the distributions of all samples are equal.
H1: the distributions of one or more samples are not equal.
Python Code
# Example of the Kruskal-Wallis H Test
from scipy.stats import kruskal
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
stat, p = kruskal(data1, data2)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Friedman Test
Tests whether the distributions of two or more paired samples are equal or not.
Assumptions
Observations in each sample are independent and identically distributed (iid).
Observations in each sample can be ranked.
Observations across each sample are paired.
Interpretation
H0: the distributions of all samples are equal.
H1: the distributions of one or more samples are not equal.
Python Code
# Example of the Friedman Test
from scipy.stats import friedmanchisquare
data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]
data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]
data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]
stat, p = friedmanchisquare(data1, data2, data3)
print('stat=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
print('Probably the same distribution')
else:
print('Probably different distributions')
More Information
Further Reading
This section provides more resources on the topic if you are looking to go deeper.
# 使用固定长度循环pop方法删除列表元素
num_list_1 = [1, 2, 2, 2, 3]
for i in range(len(num_list_1)):
if num_list_1[i] == 2:
num_list_1.pop(i)
else:
print(num_list_1[i])
print("num_list_1:", num_list_1)
# IndexError: list index out of range