-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerator.py
More file actions
48 lines (43 loc) · 1.67 KB
/
generator.py
File metadata and controls
48 lines (43 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# generator.py
from transformers import pipeline, set_seed
def get_textgen_pipeline(model_name="gpt2"):
"""
Return a text-generation pipeline. Default is 'gpt2' for small size.
You can change to 'distilgpt2' or a fine-tuned checkpoint.
"""
return pipeline("text-generation", model=model_name)
def build_conditioned_prompt(user_prompt, sentiment):
"""
Create a prompt that steers the generator toward the detected sentiment.
Simple prompt engineering: explicitly request a positive/negative/neutral paragraph.
"""
# sanitize sentiment to one of expected words
if sentiment not in ("positive", "negative", "neutral"):
sentiment = "neutral"
template = f"Write a {sentiment} paragraph about: {user_prompt}\n\nParagraph:"
return template
def generate_text(user_prompt, sentiment, max_new_tokens=120, model_name="gpt2", seed=None):
"""
Generate a paragraph aligned to sentiment. Returns generated string.
"""
textgen = get_textgen_pipeline(model_name=model_name)
prompt = build_conditioned_prompt(user_prompt, sentiment)
if seed is not None:
set_seed(seed)
# params: adjust for quality vs speed
outputs = textgen(
prompt,
max_length=len(prompt.split()) + max_new_tokens,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.8,
num_return_sequences=1,
pad_token_id=textgen.tokenizer.eos_token_id # avoid warning for tokenizer
)
# outputs is a list of dicts with 'generated_text'
text = outputs[0]["generated_text"]
# strip the prompt prefix
if text.startswith(prompt):
text = text[len(prompt):].strip()
return text