博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Keras(十三)生成csv文件
阅读量:4202 次
发布时间:2019-05-26

本文共 5438 字,大约阅读时间需要 18 分钟。

一,准备数据

从sklearn数据集中提取数据

import matplotlib as mplimport matplotlib.pyplot as pltimport numpy as npimport sklearnimport pandas as pdimport osimport sysimport timeimport tensorflow as tffrom tensorflow import keras# 打印使用的python库的版本信息print(tf.__version__)print(sys.version_info)for module in mpl, np, pd, sklearn, tf, keras:    print(module.__name__, module.__version__)    # 1,下载并使用sklearn中的“fetch_california_housing”数据集from sklearn.datasets import fetch_california_housinghousing = fetch_california_housing()# 2,拆分数据集中的数据为 训练数据、验证数据、测试数据from sklearn.model_selection import train_test_splitx_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state = 7)x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state = 11)print("x_train:",x_train.shape, y_train.shape)print("x_valid:",x_valid.shape, y_valid.shape)print("x_test:",x_test.shape, y_test.shape)# 3,在将数据带入到模型之前,先进行预处理-训练、验证、测试数据标准化from sklearn.preprocessing import StandardScalerscaler = StandardScaler()x_train_scaled = scaler.fit_transform(x_train)x_valid_scaled = scaler.transform(x_valid)x_test_scaled = scaler.transform(x_test)

二,定义保存csv文件方法

# 4,创建"generate_csv"文件夹output_dir = "generate_csv"if not os.path.exists(output_dir):    os.mkdir(output_dir)# 5,定义保存csv文件的方法def save_to_csv(output_dir, data, name_prefix,header=None, n_parts=10):    path_format = os.path.join(output_dir, "{}_{:02d}.csv")    filenames = []        for file_idx, row_indices in enumerate(np.array_split(np.arange(len(data)), n_parts)):        part_csv = path_format.format(name_prefix, file_idx)        filenames.append(part_csv)        with open(part_csv, "wt", encoding="utf-8") as f:            if header is not None:                f.write(header + "\n")            for row_index in row_indices:                f.write(",".join([repr(col) for col in data[row_index]]))                f.write('\n')    return filenames

三,将内存数据存储为csv文件

# 6,分别按行合并"训练数据","验证数据","测试数据"中的 特征值和目标值# 1)使用np.c_[]方法合并# train_data = np.c_[x_train_scaled, y_train]# valid_data = np.c_[x_valid_scaled, y_valid]# test_data = np.c_[x_test_scaled, y_test]# 2)使用np.column_stack()方法合并train_data = np.column_stack((x_train_scaled, y_train))valid_data = np.column_stack((x_valid_scaled, y_valid))test_data = np.column_stack((x_test_scaled, y_test))# 7,将数据集中增加目标特征的字段;合并特征字段为一个字符串header_cols = housing.feature_names + ["MidianHouseValue"]header_str = ",".join(header_cols)# 8,将内存中的数据存储为csv文件train_filenames = save_to_csv(output_dir, train_data, "train",header_str, n_parts=20)valid_filenames = save_to_csv(output_dir, valid_data, "valid",header_str, n_parts=10)test_filenames = save_to_csv(output_dir, test_data, "test",header_str, n_parts=10)

四,总结代码

import matplotlib as mplimport matplotlib.pyplot as pltimport numpy as npimport sklearnimport pandas as pdimport osimport sysimport timeimport tensorflow as tffrom tensorflow import keras# 打印使用的python库的版本信息print(tf.__version__)print(sys.version_info)for module in mpl, np, pd, sklearn, tf, keras:    print(module.__name__, module.__version__)    # 1,下载并使用sklearn中的“fetch_california_housing”数据集from sklearn.datasets import fetch_california_housinghousing = fetch_california_housing()# 2,拆分数据集中的数据为 训练数据、验证数据、测试数据from sklearn.model_selection import train_test_splitx_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state = 7)x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state = 11)print("x_train:",x_train.shape, y_train.shape)print("x_valid:",x_valid.shape, y_valid.shape)print("x_test:",x_test.shape, y_test.shape)# 3,在将数据带入到模型之前,先进行预处理-训练、验证、测试数据标准化from sklearn.preprocessing import StandardScalerscaler = StandardScaler()x_train_scaled = scaler.fit_transform(x_train)x_valid_scaled = scaler.transform(x_valid)x_test_scaled = scaler.transform(x_test)# 4,创建"generate_csv"文件夹output_dir = "generate_csv"if not os.path.exists(output_dir):    os.mkdir(output_dir)# 5,定义保存csv文件的方法def save_to_csv(output_dir, data, name_prefix,header=None, n_parts=10):    path_format = os.path.join(output_dir, "{}_{:02d}.csv")    filenames = []        for file_idx, row_indices in enumerate(np.array_split(np.arange(len(data)), n_parts)):        part_csv = path_format.format(name_prefix, file_idx)        filenames.append(part_csv)        with open(part_csv, "wt", encoding="utf-8") as f:            if header is not None:                f.write(header + "\n")            for row_index in row_indices:                f.write(",".join([repr(col) for col in data[row_index]]))                f.write('\n')    return filenames# 6,分别按行合并"训练数据","验证数据","测试数据"中的 特征值和目标值# 1)使用np.c_[]方法合并# train_data = np.c_[x_train_scaled, y_train]# valid_data = np.c_[x_valid_scaled, y_valid]# test_data = np.c_[x_test_scaled, y_test]# 2)使用np.column_stack()方法合并train_data = np.column_stack((x_train_scaled, y_train))valid_data = np.column_stack((x_valid_scaled, y_valid))test_data = np.column_stack((x_test_scaled, y_test))# 7,将数据集中增加目标特征的字段;合并特征字段为一个字符串header_cols = housing.feature_names + ["MidianHouseValue"]header_str = ",".join(header_cols)# 8,将内存中的数据存储为csv文件train_filenames = save_to_csv(output_dir, train_data, "train",header_str, n_parts=20)valid_filenames = save_to_csv(output_dir, valid_data, "valid",header_str, n_parts=10)test_filenames = save_to_csv(output_dir, test_data, "test",header_str, n_parts=10)

欢迎关注公众号:NLP小讲堂,第一时间了解更多人工智能、算法、自然语言处理的知识和前沿技术

在这里插入图片描述

转载地址:http://dvili.baihongyu.com/

你可能感兴趣的文章
用SpringCloud Alibaba搭建属于自己的微服务(十七)~基础搭建~alibaba sentinel服务端的安装
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(十八)~基础搭建~alibaba sentinel限流
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(十九)~基础搭建~alibaba sentinel熔断
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(二十五)~基础搭建~gateway整合swagger接口文档
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(二十六)~业务开发~用户注册
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(二十七)~业务开发~jwt实现用户登录
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(二十八)~业务开发~gateway实现鉴权
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十一)~业务开发~查看商品信息接口开发
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十二)~业务开发~扣款接口开发
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十三)~业务开发~支付接口开发
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十四)~业务开发~下订单核心接口开发
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十五)~基础搭建~alibaba seata介绍
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十六)~基础搭建~alibaba seata以nacos作为注册和配置中心的服务端安装
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十七)~基础搭建~alibaba seata客户端(TM和RM)的集成
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十八)~业务开发~下订单核心接口加入seata做分布式事务
查看>>
用SpringCloud Alibaba搭建属于自己的微服务(三十九)~自动化运维~使用jenkins为微服务提供一键部署(完结篇)
查看>>
Android RecyclerView的滑动监听
查看>>
Android setSpanSizeLookup 不被调用Log 也不走
查看>>
关于android studio 出现Error:Execution failed for task ':app:preDebugAndroidTestBuild'. 的解决办法
查看>>
RxPermissions 动态权限管理
查看>>