[TOC]
from tensorflow import keras
Keras.io
迁移学习:使用预训练的网络模型(特征提取|微调模型)
所有的模型基本包含两部分的,一个是各种卷积层,即卷积基,最后是密集连接层(Dense)组成的分类器。调用的模型其实就在调取模型的卷积基部分,最后更换自己需要的分类器来数据处理。
#使用VGG19从任意中间层提取特征
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model
import numpy as np
base_model = VGG19(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)
#到block4_pool的所有层都冻结,后边的层是可进行训练的。
img_path = 'elephant.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
block4_pool_features = model.predict(x)
#在一组新的类上微调InceptionV3
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(200, activation='softmax')(x)
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
layer.trainable = False
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
# train the model on the new data for a few epochs
model.fit(...)
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
print(i, layer.name)
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
layer.trainable = False
for layer in model.layers[249:]:
layer.trainable = True
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from tensorflow.keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit(...)
数据预处理:图像数据
#从磁盘上的原始数据转到tf.data.Dataset可用于训练模型的对象
from tensorflow import keras
from tensorflow.keras.preprocessing.image import image_dataset_from_directory
train_ds = image_dataset_from_directory(
directory='training_data/',
labels='inferred',
label_mode='categorical',
batch_size=32,
image_size=(256, 256))
validation_ds = image_dataset_from_directory(
directory='validation_data/',
labels='inferred',
label_mode='categorical',
batch_size=32,
image_size=(256, 256))
model = keras.applications.Xception(weights=None, input_shape=(256, 256, 3), classes=10)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit(train_ds, epochs=10, validation_data=validation_ds)
TensorBoard
# 当使用 Keras's Model.fit() 函数进行训练时, 添加 tf.keras.callback.TensorBoard 回调可确保创建和存储日志
# 在每个时期启用 histogram_freq=1 的直方图计算功能(默认情况下处于关闭状态)
# 将日志放在带有时间戳的子目录中,以便轻松选择不同的训练运行。
log_dir="./logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model = keras.models.Sequential([
keras.layers.Dense(16, input_dim=1),
keras.layers.Dense(1),
])
model.compile(
loss='mse', # keras.losses.mean_squared_error
optimizer=keras.optimizers.SGD(lr=0.2),
)
print("Training ... With default parameters, this takes less than 10 seconds.")
training_history = model.fit(
x_train, # input
y_train, # output
batch_size=train_size,
verbose=0, # Suppress chatty output; use Tensorboard instead
epochs=100,
validation_data=(x_test, y_test),
callbacks=[tensorboard_callback],
)
print("Average test loss: ", np.average(training_history.history['loss']))
动态学习率
记录自定义值,例如动态学习率,需要使用 TensorFlow Summary API。
- 使用
tf.summary.create_file_writer()
创建文件编写器。 - 定义自定义学习率函数。 这将传递给 Keras LearningRateScheduler 回调。
- 在学习率函数内部,使用
tf.summary.scalar()
记录自定义学习率。 - 将 LearningRateScheduler 回调传递给 Model.fit()。
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(logdir + "/metrics")
file_writer.set_as_default()
def lr_schedule(epoch):
"""
Returns a custom learning rate that decreases as epochs progress.
"""
learning_rate = 0.2
if epoch > 10:
learning_rate = 0.02
if epoch > 20:
learning_rate = 0.01
if epoch > 50:
learning_rate = 0.005
tf.summary.scalar('learning rate', data=learning_rate, step=epoch)
return learning_rate
lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
model = keras.models.Sequential([
keras.layers.Dense(16, input_dim=1),
keras.layers.Dense(1),
])
model.compile(
loss='mse', # keras.losses.mean_squared_error
optimizer=keras.optimizers.SGD(),
)
training_history = model.fit(
x_train, # input
y_train, # output
batch_size=train_size,
verbose=0, # Suppress chatty output; use Tensorboard instead
epochs=100,
validation_data=(x_test, y_test),
callbacks=[tensorboard_callback, lr_callback],
)
超参数调整
#PS:还是nni好用吧
# 导入TensorFlow和TensorBoard HParams插件
import datetime
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
def create_model():
return tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
def train_test_model(hparams):
'''
该模型将非常简单:两个密集层之间有一个辍学层。不再对超参数进行硬编码,相反,超参数在hparams字典中提供,并在整个训练功能中使用.
'''
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
tf.keras.layers.Dense(10, activation=tf.nn.softmax),
])
model.compile(
optimizer=hparams[HP_OPTIMIZER],
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],
)
model.fit(x_train, y_train,
epochs=1,
)
_, accuracy = model.evaluate(x_test, y_test)
return accuracy
def run(run_dir, hparams):
'每次运行记录具有超参数和最终精度的hparams摘要'
with tf.summary.create_file_writer(run_dir).as_default():
hp.hparams(hparams) # record the values used in this trial
accuracy = train_test_model(hparams)
tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
'''超参数调整'''
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32])) # 第一密集层中的单元数
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.1, 0.2)) # dropout层中的dropout比例
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd'])) # 优化器
METRIC_ACCURACY = 'accuracy'
with tf.summary.create_file_writer('./logs/hparam_tuning').as_default():
hp.hparams_config(
hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER],
metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
)
session_num = 0
for num_units in HP_NUM_UNITS.domain.values:
for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
for optimizer in HP_OPTIMIZER.domain.values:
hparams = {
HP_NUM_UNITS: num_units,
HP_DROPOUT: dropout_rate,
HP_OPTIMIZER: optimizer,
}
run_name = "run-%d" % session_num
print('--- Starting trial: %s' % run_name)
print({h.name: hparams[h] for h in hparams})
run('./logs/hparam_tuning/' + run_name, hparams)
session_num += 1
静态图与动态图
静态计算图需要先构建再运行
- 优势是在运行前可以对图结构进行优化,比如常数折叠、算子融合等,可以获得更快的前向运算速度。
- 缺点是只有在计算图运行起来之后,才能看到变量的值,像TensorFlow1.x中的session.run那样。
动态图是一边运行一边构建
- 优势是可以在搭建网络的时候看见变量的值,便于检查。
- 缺点是前向运算不好优化,因为根本不知道下一步运算要算什么。