Tensorflow(十四) —— 数据集加载

发布时间:2025-01-14 14:01

Tensorflow(十四) —— 数据集加载

  • 1. 主要方法
  • 2. keras.datasets
  • 3. mnist
  • 4. cifar10/100
  • 5. imdb
  • 6. tf.data.Dataset.from_tensor_slice
  • 7. db.shuffle
  • 8. db.map
  • 9. db.batch
  • 10. db.repeat
  • 11. 实例

1. 主要方法

1、keras.datasets
2、tf.data.Dataset.from_tensor_slices
shuffle
map
batch
repeat

2. keras.datasets

1、boston housing # 波斯顿房价预测
2、minist/fashion mnist
3、cifar10/cifar100
4、imdb # 用户评语

3. mnist

# ****************** mnist
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
\"\"\"
返回numpy格式
\"\"\"
print(x.min())
print(x.max())
y_onehot = tf.one_hot(y,depth=10)
print(y_onehot[1])

4. cifar10/100

# ***************** cifar10/100
\"\"\"
同样的图片,标注不同,10个大类,100个小类
\"\"\"
(x,y),(x_test,y_test) = keras.datasets.cifar10.load_data()

print(x.shape)
plt.imshow(x[2])
plt.show()
apple = plt.imread(\"./一颗苹果.jpg\")
apple.shape
plt.imshow(apple)
plt.axis(\"off\")

print(y[0])

\"Tensorflow(十四)

5. imdb

(x,y),(x_test,y_test) = keras.datasets.imdb.load_data()
print(x.shape)
print(x[0])
print(y[0])

6. tf.data.Dataset.from_tensor_slice

(x,y),(x_tses,y_test) = keras.datasets.cifar10.load_data()

db1 = tf.data.Dataset.from_tensor_slices(x)
\"\"\"
形成可迭代的数据库
\"\"\"
ite = iter(db1)
print(next(ite).shape)

db2 = tf.data.Dataset.from_tensor_slices((x,y)) # can not use [x,y]
ite2 = iter(db2)
print(next(ite2)[1].shape)

7. db.shuffle

# ****************** db.shuffle
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()

db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db.shuffle(10000) # 数值越大 混乱度越大

8. db.map

# ******************** db.map
\"\"\"
数据预处理时使用,map可直接调用函数 对元素进行操作
\"\"\"
def preprocess(x,y):
    x = tf.cast(x,dtype=tf.float32)
    y = tf.cast(y,dtype=tf.int32)
    
    x = x/tf.reduce_max(x)
    y = tf.one_hot(y,depth=10)
    return x,y

(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db1 = db.map(preprocess)
result = next(iter(db1))
print(result[0].shape)
print(result[1].shape)

9. db.batch

# ******************* db.batch
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))

db1 = db.batch(100) # 100指一份含100条数据
res = next(iter(db1))
print(res[0].shape)
print(res[1].shape)

10. db.repeat

# ******************* db.repeat
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))

db1 = db.batch(100)

db2 = db1.repeat()  # 一直循环,不退出 next一直可取

db3 = db1.reduce(2) # 重复两次

11. 实例

# ****************** 实例
def mnist_data():
    def preprocess(x,y):
        x = tf.cast(x,dtype=tf.float32)/255
        y = tf.one_hot(tf.cast(y,dtype=tf.int32),depth=10)
        return x,y
    
    (x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
    db_train = tf.data.Dataset.from_tensor_slices((x,y)).shuffle(10000)
    db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test)).shuffle(10000)
    
    db_train = db_train.map(preprocess).batch(100).repeat()
    db_test = db_test.map(preprocess).batch(100).repeat()
    
    
    return db_train,db_test
if __name__ == \"__main__\":
    mnist_data()
    

本文为参考龙龙老师的“深度学习与TensorFlow 2入门实战“课程书写的学习笔记

by CyrusMay 2022 04 16

ItVuer - 免责声明 - 关于我们 - 联系我们

本网站信息来源于互联网,如有侵权请联系:561261067@qq.com

桂ICP备16001015号