Sentiment-Textgen/generator.py at main · Subhajitdas99/Sentiment-Textgen · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# generator.py
from transformers import pipeline, set_seed

def get_textgen_pipeline(model_name="gpt2"):
    """
    Return a text-generation pipeline. Default is 'gpt2' for small size.
    You can change to 'distilgpt2' or a fine-tuned checkpoint.
    """
    return pipeline("text-generation", model=model_name)

def build_conditioned_prompt(user_prompt, sentiment):
    """
    Create a prompt that steers the generator toward the detected sentiment.
    Simple prompt engineering: explicitly request a positive/negative/neutral paragraph.
    """
    # sanitize sentiment to one of expected words
    if sentiment not in ("positive", "negative", "neutral"):
        sentiment = "neutral"

    template = f"Write a {sentiment} paragraph about: {user_prompt}\n\nParagraph:"
    return template

def generate_text(user_prompt, sentiment, max_new_tokens=120, model_name="gpt2", seed=None):
    """
    Generate a paragraph aligned to sentiment. Returns generated string.
    """
    textgen = get_textgen_pipeline(model_name=model_name)
    prompt = build_conditioned_prompt(user_prompt, sentiment)
    if seed is not None:
        set_seed(seed)

    # params: adjust for quality vs speed
    outputs = textgen(
        prompt,
        max_length=len(prompt.split()) + max_new_tokens,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.8,
        num_return_sequences=1,
        pad_token_id=textgen.tokenizer.eos_token_id  # avoid warning for tokenizer
    )
    # outputs is a list of dicts with 'generated_text'
    text = outputs[0]["generated_text"]
    # strip the prompt prefix
    if text.startswith(prompt):
        text = text[len(prompt):].strip()
    return text