Source code for deepr.examples.movielens.prepros.csv

# pylint: disable=no-value-for-parameter,unexpected-keyword-arg
"""CSV Preprocessing for MovieLens."""

from typing import Optional

import deepr
import tensorflow as tf

from deepr.examples.movielens.utils import fields as F


[docs]def CSVPrepro( vocab_size: int, batch_size: int = 512, repeat_size: Optional[int] = None, prefetch_size: int = 1, num_parallel_calls: int = 8, num_negatives: int = None, ): """CSV Preprocessing for MovieLens.""" fields = [ F.UID, F.INPUT_POSITIVES, F.INPUT_MASK, F.TARGET_POSITIVES, F.TARGET_MASK, F.INPUT_POSITIVES_ONE_HOT(vocab_size), F.TARGET_POSITIVES_ONE_HOT(vocab_size), ] return deepr.prepros.Serial( deepr.prepros.Map(SequenceMask(inputs="inputPositives", outputs="inputMask")), deepr.prepros.Map(SequenceMask(inputs="targetPositives", outputs="targetMask")), deepr.prepros.PaddedBatch(batch_size=batch_size, fields=fields), deepr.prepros.Map( RandomNegatives( inputs="targetPositives", outputs="targetNegatives", num_negatives=num_negatives, vocab_size=vocab_size ) ) if num_negatives is not None else [], deepr.prepros.Repeat(repeat_size, modes=[deepr.TRAIN]), deepr.prepros.Prefetch(prefetch_size), num_parallel_calls=num_parallel_calls, )
[docs]@deepr.layers.layer(n_in=1, n_out=1) def SequenceMask(tensors): size = tf.size(tensors) return tf.sequence_mask(size)
[docs]@deepr.layers.layer(n_in=1, n_out=1) def RandomNegatives(tensors, num_negatives, vocab_size): negatives = tf.random.uniform(shape=[tf.shape(tensors)[0], 1, num_negatives], maxval=vocab_size, dtype=tf.int64) return negatives