from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
# Let's create a dummy dataset data = [' '.join(tokens)] in3x,net,watch,14zwhrd6,dildo,18
# Your data text = "in3x,net,watch,14zwhrd6,dildo,18" from sklearn
# Tokenize (simple split) tokens = text.split(',') from sklearn.feature_extraction.text import CountVectorizer