Do you want to try out this notebook? Get a free account (no credit-card reqd) at hopsworks.ai. You can also install open-source Hopsworks or view tutorial videos here.
5. Train a credit card fraud detector model
Train a credit card fraud detector with Tensorflow (Keras)
import tensorflow as tf
tf.__version__
Starting Spark application
ID | YARN Application ID | Kind | State | Spark UI | Driver log |
---|---|---|---|---|---|
63 | application_1623853832952_0045 | pyspark | idle | Link | Link |
SparkSession available as 'spark'.
'2.4.1'
Define the autoencoder
class CCFraudDetector(tf.keras.Model):
def __init__(self, input_dim):
super(CCFraudDetector, self).__init__()
self.encoder = tf.keras.Sequential([
tf.keras.layers.Dense(16, activation='selu', input_shape=(input_dim,)),
tf.keras.layers.Dense(8, activation='selu'),
tf.keras.layers.Dense(4, activation='linear', name="bottleneck")])
self.decoder = tf.keras.Sequential([
tf.keras.layers.Dense(8, activation='selu'),
tf.keras.layers.Dense(16, activation='selu'),
tf.keras.layers.Dense(input_dim, activation='selu')])
def call(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
Extend the autoencoder to return the reconstruction loss
class CCFraudDetectorModule(tf.Module):
def __init__(self, detector):
self.detector = detector
@tf.function()
def reconstruct(self, instances):
reconstruction = self.detector(instances)
loss = tf.reduce_mean((instances - reconstruction)**2, axis=1)
return { "reconstruction_loss": loss }
Create experiment
def experiment_wrapper():
import os
import sys
import uuid
import random
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard
from hops import tensorboard
from hops import model as hops_model
from hops import hdfs
import hsfs
# Create a connection
connection = hsfs.connection(engine='training')
# Get the feature store handle for the project's feature store
fs = connection.get_feature_store()
# Get training dataset
td_meta = fs.get_training_dataset("card_fraud_model", 1)
input_dim = 9
BATCH_SIZE = 32
EPOCHS = 5
# Training data
train_input = td_meta.tf_data(target_name=None, is_training=True)
train_input_not_processed = train_input.tf_record_dataset()
def custom_impl(example):
feature_names = [td_feature.name for td_feature in td_meta.schema]
x = [tf.cast(example[feature_name], tf.float32) for feature_name in feature_names]
return x,x
train_input_custom_processed = train_input_not_processed.map(lambda value: custom_impl(value))\
.shuffle(EPOCHS * BATCH_SIZE)\
.repeat(EPOCHS * BATCH_SIZE)\
.cache()\
.batch(BATCH_SIZE, drop_remainder=True)\
.prefetch(tf.data.experimental.AUTOTUNE)
# Autoencoder
cc_fraud_detector = CCFraudDetector(input_dim)
# Compile the model.
cc_fraud_detector.compile(loss=tf.keras.losses.MeanSquaredError(),
optimizer= tf.keras.optimizers.Adam(0.001),
metrics=tf.keras.metrics.MeanSquaredError())
# Fit training data
history = cc_fraud_detector.fit(train_input_custom_processed,
verbose=0,
epochs=EPOCHS,
steps_per_epoch=1,
validation_data=train_input_custom_processed,
validation_steps=1,
callbacks=[tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir())])
metrics={'loss': history.history['loss'][0]}
# Export model
export_path = os.getcwd() + '/model-' + str(uuid.uuid4())
print('Saving trained model to: {} ...'.format(export_path))
# Module and Signature
model = CCFraudDetectorModule(cc_fraud_detector)
signatures=model.reconstruct.get_concrete_function(
instances=tf.TensorSpec(shape=(None,9,), dtype=tf.float32))
tf.saved_model.save(model, export_path, signatures=signatures)
print('Model saved!')
print('Exporting model ...')
hops_model.export(export_path, "ccfraudmodel", metrics=metrics)
print('Model exported!')
return metrics
Launch experiment
from hops import experiment
from hops import hdfs
experiment.launch(experiment_wrapper, name='credit card fraud model', local_logdir=True, metric_key='loss')
Finished Experiment
('hdfs://rpc.namenode.service.consul:8020/Projects/card_fraud_detection/Experiments/application_1623853832952_0045_1', {'loss': 1.74222993850708, 'log': 'Experiments/application_1623853832952_0045_1/output.log'})