Tokenization
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequencesmax_words = 1000 # Adjust based on your dataset size
max_len = 200 # Adjust based on the average length of your news articles
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_texts)
train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)
train_data = pad_sequences(train_sequences, maxlen=max_len)
test_data = pad_sequences(test_sequences, maxlen=max_len)Last updated