Controllable generation via RL to let Elon Musk speak ill of DOGE
How to control text generation through a sentiment classifier.
!pip install pfrl@git+https://github.com/voidful/pfrl.git
!pip install textrl==0.1.6
from textrl import TextRLEnv,TextRLActor
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer, AutoModelWithLMHead
import logging
import sys
import pfrl
import torch
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='')
Using a pre-trained model, it can generate elonmusk's style tweets.
tokenizer = AutoTokenizer.from_pretrained("huggingtweets/elonmusk")
model = AutoModelWithLMHead.from_pretrained("huggingtweets/elonmusk")
model.eval()
model.cuda()
a sentiment classifier for rl reward
sentiment = pipeline('sentiment-analysis',model="cardiffnlp/twitter-roberta-base-sentiment",tokenizer="cardiffnlp/twitter-roberta-base-sentiment",device=0,return_all_scores=True)
transformers_logger = logging.getLogger('transformers')
transformers_logger.setLevel(logging.CRITICAL)
sentiment("dogecoin is bad")
sentiment("dogecoin is bad")[0][0]['score']
set our text generation reward, inverse perplexity + sentiment classifier.
- inverse perplexity make sure the generated sentence probability will be high.
- sentiment classifier can make the generate more negative.
class MyRLEnv(TextRLEnv):
def get_reward(self, input_item, predicted_list, finish): # predicted will be the list of predicted token
reward = 0
if finish or len(predicted_list) >= self.env_max_length:
if 1 < len(predicted_list):
predicted_text = tokenizer.convert_tokens_to_string(predicted_list)
# sentiment classifier
reward += sentiment(input_item[0]+predicted_text)[0][0]['score']
return reward
fit one example
observaton_list = [['i think dogecoin is']]
env = MyRLEnv(model, tokenizer, observation_input=observaton_list)
actor = TextRLActor(env,model,tokenizer)
agent = actor.agent_ppo(update_interval=10, minibatch_size=10, epochs=10)
actor.predict(observaton_list[0])
pfrl.experiments.train_agent_with_evaluation(
agent,
env,
steps=100,
eval_n_steps=None,
eval_n_episodes=1,
train_max_episode_len=100,
eval_interval=10,
outdir='elon_musk_dogecoin',
)
loading the best result and predict.
agent.load("./elon_musk_dogecoin/best")
actor.predict(observaton_list[0])