5. Train a credit card fraud detector model

Train a credit card fraud detector with Tensorflow (Keras)

overview-4.png

import tensorflow as tf
tf.__version__
Starting Spark application
IDYARN Application IDKindStateSpark UIDriver log
63application_1623853832952_0045pysparkidleLinkLink
SparkSession available as 'spark'.
'2.4.1'

Define the autoencoder

class CCFraudDetector(tf.keras.Model):
  def __init__(self, input_dim):
    super(CCFraudDetector, self).__init__()
    self.encoder = tf.keras.Sequential([
        tf.keras.layers.Dense(16, activation='selu', input_shape=(input_dim,)),
        tf.keras.layers.Dense(8, activation='selu'),
        tf.keras.layers.Dense(4, activation='linear', name="bottleneck")])

    self.decoder = tf.keras.Sequential([
        tf.keras.layers.Dense(8, activation='selu'),
        tf.keras.layers.Dense(16, activation='selu'),
        tf.keras.layers.Dense(input_dim, activation='selu')])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

Extend the autoencoder to return the reconstruction loss

class CCFraudDetectorModule(tf.Module):
    def __init__(self, detector):
        self.detector = detector

    @tf.function()
    def reconstruct(self, instances):
        reconstruction = self.detector(instances)
        loss = tf.reduce_mean((instances - reconstruction)**2, axis=1)
        return { "reconstruction_loss": loss }

Create experiment

def experiment_wrapper():

    import os
    import sys
    import uuid
    import random
        
    import tensorflow as tf
    from tensorflow.keras.callbacks import TensorBoard
    from hops import tensorboard
    
    from hops import model as hops_model
    from hops import hdfs
    import hsfs
    
    # Create a connection
    connection = hsfs.connection(engine='training')
    # Get the feature store handle for the project's feature store
    fs = connection.get_feature_store()
    # Get training dataset
    td_meta = fs.get_training_dataset("card_fraud_model", 1)

    input_dim = 9
    BATCH_SIZE = 32
    EPOCHS = 5
    
    # Training data
    train_input = td_meta.tf_data(target_name=None, is_training=True)
    train_input_not_processed = train_input.tf_record_dataset()
    
    def custom_impl(example):
        feature_names = [td_feature.name for td_feature in td_meta.schema] 
        x = [tf.cast(example[feature_name], tf.float32) for feature_name in feature_names]
        return x,x
    
    train_input_custom_processed = train_input_not_processed.map(lambda value: custom_impl(value))\
        .shuffle(EPOCHS * BATCH_SIZE)\
        .repeat(EPOCHS * BATCH_SIZE)\
        .cache()\
        .batch(BATCH_SIZE, drop_remainder=True)\
        .prefetch(tf.data.experimental.AUTOTUNE)


    # Autoencoder
    cc_fraud_detector = CCFraudDetector(input_dim)
    
    # Compile the model.
    cc_fraud_detector.compile(loss=tf.keras.losses.MeanSquaredError(),
                              optimizer= tf.keras.optimizers.Adam(0.001),
                              metrics=tf.keras.metrics.MeanSquaredError())
    
    # Fit training data
    history = cc_fraud_detector.fit(train_input_custom_processed,
                                    verbose=0,
                                    epochs=EPOCHS, 
                                    steps_per_epoch=1,
                                    validation_data=train_input_custom_processed,
                                    validation_steps=1,
                                    callbacks=[tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir()),
                                               tf.keras.callbacks.ModelCheckpoint(filepath=tensorboard.logdir())])  
    

    metrics={'loss': history.history['loss'][0]}
    
    # Export model
    export_path = os.getcwd() + '/model-' + str(uuid.uuid4())
    print('Saving trained model to: {} ...'.format(export_path))
    
    # Module and Signature
    model = CCFraudDetectorModule(cc_fraud_detector)
    signatures=model.reconstruct.get_concrete_function(
        instances=tf.TensorSpec(shape=(None,9,), dtype=tf.float32)) 
    
    tf.saved_model.save(model, export_path, signatures=signatures)
    print('Model saved!')
    
    print('Exporting model ...')    
    hops_model.export(export_path, "ccfraudmodel", metrics=metrics)
    
    print('Model exported!')
    return metrics

Launch experiment

from hops import experiment
from hops import hdfs

experiment.launch(experiment_wrapper, name='credit card fraud model', local_logdir=True, metric_key='loss')
Finished Experiment 

('hdfs://rpc.namenode.service.consul:8020/Projects/card_fraud_detection/Experiments/application_1623853832952_0045_1', {'loss': 1.74222993850708, 'log': 'Experiments/application_1623853832952_0045_1/output.log'})