特征工程时间数据处理police['date'] = pd.to_datetime(police['接警日期'],errors='coerce')police['year'] =police['date'].dt.year.fillna(0).astype("int")#转化提取年police['month'] = police['date'].dt.month.fillna(0).astype("int")#转化提取月police['day'] = police['date'].dt.day.fillna(0).astype("int")#转化提取天police['dates'] = police['month'].map(str) + '-' + police['day'].map(str) #转化获取月-日police['time'] = pd.to_datetime(police['接警时间点'],errors='coerce').dt.timepolice['hour'] = pd.to_datetime(police['接警时间点'],errors='coerce').dt.hour.fillna(0).astype("int")#转化提取小时
SMOTE过抽样from imblearn.over_sampling import SMOTEmodel_smote=SMOTE()X,y=model_smote.fit_resample(X,y)X=pd.DataFrame(X,columns=t.columns)#分拆数据集:训练集 和 测试集X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)print('过抽样数据特征:', X.shape,'训练数据特征:',X_train.shape,'测试数据特征:',X_test.shape)print('过抽样后数据标签:', y.shape,'训练数据标签:',y_train.shape,'测试数据标签:',y_test.shape)
输出缺失值print ("Train age missing value: " + str((train.Age.isnull().sum()/len(train))*100)+str("%"))
影响分析xgb输出特征重要性model_xgb= XGBClassifier()model_xgb.fit(X,y)from xgboost import plot_importanceplot_importance(model_xgb,height=0.5,color='green',title="')# plt.savefig("imp.png')plt.show()
计算相关系数并画图plt.style.use('classic')plt.rcParams['font.sans-serif'] = ['SimHei']# 黑体plt.rcParams['axes.unicode_minus'] = False# 解决无法显示符号的问题plt.rc("figure", facecolor="white")#去除灰色边框plt.figure(figsize=(15,6),dpi=300)df_onehot.corr()['购买意愿'].sort_values(ascending=False).plot(kind='bar',color='dodgerblue')plt.savefig('buyvary1.png', dpi=300)plt.show()data.corr(method='pearson')data.corr(method='spearman')data.corr(method='kendall')
Pandas处理常用操作
为dataframe添加1列
data['age']=list
合并表格再排序
data = https://www.huyubaike.com/biancheng/pd.concat([with_N, without_N], axis=0)data.sort_values(by ='目标客户编号', inplace=True)
【Python数据分析:实用向】dataframe排序
useful=useful.sort_values(by = ['购买难度'], ascending = [True])
选取指定行(以列的值筛选)
first1=data3[(data3['品牌编号']==1)]
获取列名kf=list(data2.columns[1:7])for x in [9,11,12,20,21,24,25,26]:kf.append(data2.columns[x])print(kf)
修改列名#1、修改列名a , b为A、B 。df.columns = ['A','B']#2、只修改列名a为Adf.rename(columns={'a':'A'})
删除一列data3=data3.drop(1,axis=0)
列表转dataframe(嵌套列表)from pandas.core.frame import DataFramedata7=DataFrame(week)data7
类型转换Dataframe到Series
Series = Dataframe['column']
Series到list
list = Series.to_list()
list 转 array
array = np.array(list)
array 转 torch.Tensor
tensor = torch.from_numpy(array)
torch.Tensor 转 array
array = tensor.numpy()# gpu情况下需要如下的操作array = tensor.cpu().numpy()
torch.Tensor 转 list
# 先转numpy , 后转listlist = tensor.numpy().tolist()
array 转 list
list = array.tolist()
list 转 torch.Tensor
tensor=torch.Tensor(list)
array或者list转Series经验总结扩展阅读
- 给父母买什么礼物最实用
- 微波炉和烤箱哪个更实用
- python ROS2时间同步
- 图文 Python 嵌入式打包
- 教师节礼物送什么实用
- 送大学生什么礼物好实用
- 媳妇生日送什么礼物好实用的
- 其三 Gitea 1.18 功能前瞻:增强文本预览效果、继续扩展软件包注册中心、增强工单实用功能、完善了用户邀请机制和SEO
- 3 Python全栈工程师之从网页搭建入门到Flask全栈项目实战 - 入门Flask微框架
- 跟我学Python图像处理丨图像特效处理:毛玻璃、浮雕和油漆特效