Python数据分析:实用向( 三 )


series = pd.Series({'a': array})series2 = pd.Series({'a': list})

list转dataframe
data4=DataFrame(li)
array转dataframe
df = pd.DataFrame(data=https://www.huyubaike.com/biancheng/data[0:,0:],columns='pregnants','Plasma_glucose_concentration','blood_pressure','Triceps_skin_fold_thickness','serum_insulin','BMI','Diabetes_pedigree_function','Age','Target'] )python需要注意的地方变量列表的复制:直接采用a=b的方式会指向同一个内存地址
全局变量:函数内部的变量 , 外部是无法访问的 , 在函数内部定义global 后函数运行过才可访问
循环
  • continue: 跳出本次循环
  • break: 跳出本层循环
运算矩阵numpy乘法:
  • 点乘: np.dot(xy)
  • 数乘: np.mat(x,int)
随机数import randomprint( random.randint(1,10) )# 产生 1 到 10 的一个整数型随机数print( random.random() )# 产生 0 到 1 之间的随机浮点数print( random.uniform(1.1,5.4) )# 产生1.1 到 5.4 之间的随机浮点数 , 区间可以不是整数print( random.choice('tomorrow') )# 从序列中随机选取一个元素print( random.randrange(1,100,2) )# 生成从1到100的间隔为2的随机整数a=[1,3,5,6,7]# 将序列a中的元素顺序打乱random.shuffle(a)print(a)import randomimport string# 随机整数:print random.randint(1,50)# 随机选取0到100间的偶数:print random.randrange(0, 101, 2)# 随机浮点数:print random.random()print random.uniform(1, 10)# 随机字符:print random.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()')# 多个字符中生成指定数量的随机字符:print random.sample('zyxwvutsrqponmlkjihgfedcba',5)# 从a-zA-Z0-9生成指定数量的随机字符:ran_str = ''.join(random.sample(string.ascii_letters + string.digits, 8))print ran_str# 多个字符中选取指定数量的字符组成新字符串:print ''.join(random.sample(['z','y','x','w','v','u','t','s','r','q','p','o','n','m','l','k','j','i','h','g','f','e','d','c','b','a'], 5))# 随机选取字符串:print random.choice(['剪刀', '石头', '布'])# 打乱排序items = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]print random.shuffle(items)画图画图准备解决中文符号显示问题plt.rcParams['font.sans-serif'] = ['SimHei']# 黑体plt.rcParams['axes.unicode_minus'] = False# 解决无法显示符号的问题sns.set(font='SimHei', font_scale=0.8)# 解决Seaborn中文显示问题设置背景样式plt.style.use('classic')plt.rc("figure", facecolor="white")#去除灰色边框绘图这是一个画箱线图代码import matplotlib.pyplot as pltsns.set_style('darkgrid')fig, ax = plt.subplots(figsize=(16,12),ncols=2)ax1 = sns.boxplot(x="Embarked", y="Fare", hue="Pclass", data=https://www.huyubaike.com/biancheng/train, ax = ax[0]);ax2 = sns.boxplot(x="Embarked", y="Fare", hue="Pclass", data=https://www.huyubaike.com/biancheng/test, ax = ax[1]);ax1.set_title("Training Set", fontsize = 18)ax2.set_title('Test Set',fontsize = 18)fig.show()画缺口饼图churn_value=https://www.huyubaike.com/biancheng/data['cvr_group_high'].value_counts()labels=data['cvr_group_high'].value_counts().indexplt.figure(figsize=(7,7))plt.pie(churn_value,labels=['一般客户', '高价值客户'],colors=["#75bbfd","#00ffff"], explode=(0.05,0),autopct='%1.1f%%', shadow=False)plt.rcParams['font.sans-serif']=['SimHei']plt.rcParams['axes.unicode_minus'] = Falseplt.title("高价值客户占比23.4%")#plt.savefig('pie.png', dpi=300)画相关性系数图mask = np.zeros_like(data.corr(), dtype=np.bool)#mask[np.triu_indices_from(mask)] = Trueplt.subplots(figsize = (15,12))sns.heatmap(data.corr(),annot=True,#mask = mask,cmap = 'RdBu', ## in order to reverse the bar replace "RdBu" with "RdBu_r"linewidths=.9,linecolor='gray',fmt='.2g',center = 0,square=True)plt.title("Correlations Among Features", y = 1.03,fontsize = 20, pad = 40) #相关性矩阵plt.savefig('cor.png', dpi=300)plt.show()

经验总结扩展阅读