SentimentCalculationEngine.java

package com.kapil.verbametrics.services.engines;

import com.kapil.verbametrics.config.SentimentAnalysisProperties;
import com.kapil.verbametrics.config.SentimentRuleProperties;
import com.kapil.verbametrics.services.WordListService;
import com.kapil.verbametrics.util.VerbaMetricsConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.Arrays;
import java.util.Set;

/**
 * Business logic engine for sentiment score calculation.
 * Handles the core logic for calculating sentiment scores from text.
 *
 * @author Kapil Garg
 */
@Component
public class SentimentCalculationEngine {

    private static final Logger LOGGER = LoggerFactory.getLogger(SentimentCalculationEngine.class);

    private final WordListService wordListService;
    private final SentimentRuleProperties ruleProperties;
    private final SentimentAnalysisProperties analysisProperties;

    @Autowired
    public SentimentCalculationEngine(WordListService wordListService, SentimentAnalysisProperties analysisProperties, SentimentRuleProperties ruleProperties) {
        this.wordListService = wordListService;
        this.analysisProperties = analysisProperties;
        this.ruleProperties = ruleProperties;
    }

    /**
     * Calculates the sentiment score based on positive and negative word sets.
     *
     * @param text the text to analyze
     * @return the sentiment score between -1.0 and 1.0
     */
    public double calculateSentimentScore(String text) {
        if (text == null || text.isBlank()) {
            return 0.0;
        }
        String normalized = getNormalizedString(text);
        var tokens = tokenizeFromNormalized(normalized);
        var totalWords = (int) Arrays.stream(tokens).filter(token -> !token.isBlank()).count();
        if (totalWords == 0) {
            return 0.0;
        }
        double phraseAdjustment = calculatePhraseAdjustments(text);
        double weightedSum = calculateWeightedSentimentNormalized(normalized) + phraseAdjustment;
        double denominator = Math.sqrt(weightedSum * weightedSum + ruleProperties.getNormalizationAlpha());
        return denominator > 0 ? weightedSum / denominator : 0.0;
    }

    /**
     * Splits already-normalized text into tokens based on the configured word separator.
     *
     * @param normalized already-normalized text
     * @return an array of tokens
     */
    private String[] tokenizeFromNormalized(String normalized) {
        if (normalized == null || normalized.isBlank()) {
            return new String[0];
        }
        String[] tokens = normalized.split(analysisProperties.getTextProcessing().getWordSeparator());
        for (int i = 0; i < tokens.length; i++) {
            tokens[i] = tokens[i].replace(VerbaMetricsConstants.APOSTROPHE_PLACEHOLDER, "'");
        }
        return tokens;
    }

    /**
     * Normalizes the input text based on case sensitivity and preprocessing rules.
     * <p>
     * Note: Hyphen normalization replaces all hyphens with spaces, which may break
     * hyphenated compound words (e.g., 'state-of-the-art' becomes 'state of the art').
     * This behavior can be disabled via the normalizeHyphens configuration property.
     *
     * @param text the text to normalize
     * @return the normalized string
     */
    private String getNormalizedString(String text) {
        boolean caseSensitive = analysisProperties.getTextProcessing().isCaseSensitive();
        boolean normalizeHyphens = analysisProperties.getTextProcessing().isNormalizeHyphens();
        String preprocessed = text;
        if (normalizeHyphens) {
            preprocessed = preprocessed.replace('-', ' ');
        }
        preprocessed = preprocessed.replaceAll("'", VerbaMetricsConstants.APOSTROPHE_PLACEHOLDER);
        return caseSensitive ? preprocessed.trim() : preprocessed.trim().toLowerCase();
    }

    /**
     * Calculates a weighted sentiment sum from normalized text.
     * Processes text sentence by sentence to properly reset context at sentence boundaries.
     *
     * @param normalizedText the normalized input text
     * @return the weighted sentiment sum
     */
    private double calculateWeightedSentimentNormalized(String normalizedText) {
        var positiveWords = wordListService.getPositiveWords();
        var negativeWords = wordListService.getNegativeWords();
        String[] sentences = splitIntoSentences(normalizedText);
        double sum = 0.0;
        for (String sentence : sentences) {
            if (sentence == null || sentence.isBlank()) continue;
            String[] sentenceTokens = tokenizeFromNormalized(sentence);
            SentimentContext context = new SentimentContext();
            for (int i = 0; i < sentenceTokens.length; i++) {
                String token = sentenceTokens[i];
                if (token.isBlank()) {
                    continue;
                }
                if (handleSpecialTokens(token, i, sentenceTokens, context)) {
                    continue;
                }
                double contribution = processSentimentToken(token, i, sentenceTokens, positiveWords, negativeWords, context);
                sum += contribution;
            }
        }
        return sum;
    }

    /**
     * Splits text into sentences based on sentence-ending punctuation.
     * Sentence boundaries are periods, exclamation marks, or question marks followed by whitespace or end of text.
     *
     * @param text the text to split
     * @return array of sentences
     */
    private String[] splitIntoSentences(String text) {
        if (text == null || text.isBlank()) {
            return new String[]{text};
        }
        // Split on sentence-ending punctuation followed by whitespace or end of string
        // This regex matches: sentence-ending punctuation (. ! ?) followed by optional whitespace
        String[] sentences = text.split("(?<=[.!?])\\s+");
        return Arrays.stream(sentences)
                .filter(s -> s != null && !s.trim().isBlank())
                .toArray(String[]::new);
    }

    /**
     * Handles special tokens like punctuation, contrastives, and negations.
     *
     * @param token   current token
     * @param index   current index
     * @param tokens  all tokens
     * @param context sentiment context
     * @return true if token was handled and should be skipped
     */
    private boolean handleSpecialTokens(String token, int index, String[] tokens, SentimentContext context) {
        if (ruleProperties.getPunctuationBreaks().contains(token)) {
            resetContext(context);
            return true;
        }
        if (ruleProperties.getContrastives().contains(token)) {
            context.afterContrastive = true;
            context.contrastiveCountdown = ruleProperties.getContrastiveWindow();
            return true;
        }
        updateNegationState(token, index, tokens, context);
        return false;
    }

    /**
     * Processes a sentiment-bearing token and returns its contribution to the total score.
     *
     * @param token         current token
     * @param index         current index
     * @param tokens        all tokens
     * @param positiveWords set of positive words
     * @param negativeWords set of negative words
     * @param context       sentiment context
     * @return contribution to sentiment score
     */
    private double processSentimentToken(String token, int index, String[] tokens, Set<String> positiveWords, Set<String> negativeWords,
                                         SentimentContext context) {
        boolean isPositive = positiveWords.contains(token);
        boolean isNegative = negativeWords.contains(token);
        if (!isPositive && !isNegative) {
            return 0.0;
        }
        double valence = calculateValence(token, isPositive, isNegative, context);
        double modifier = calculateModifier(tokens, index, context);
        double contribution = valence * modifier;
        updateNegationWindow(context);
        return contribution;
    }

    /**
     * Resets the sentiment context.
     *
     * @param context the sentiment context
     */
    private void resetContext(SentimentContext context) {
        context.negationActive = false;
        context.negationWindow = 0;
        context.afterContrastive = false;
        context.contrastiveCountdown = 0;
    }

    /**
     * Updates negation state based on current token.
     *
     * @param token   the current token
     * @param index   the current index
     * @param tokens  the tokenized input text
     * @param context the sentiment context
     */
    private void updateNegationState(String token, int index, String[] tokens, SentimentContext context) {
        // Handle "not only" - this doesn't create negation
        if ("not".equals(token) && index + 1 < tokens.length && "only".equals(tokens[index + 1])) {
            context.negationActive = false;
            context.negationWindow = 0;
            return;
        }
        // Handle "not without" - this is a litotes (double negative = positive), so cancel negation
        if ("not".equals(token) && index + 1 < tokens.length && "without".equals(tokens[index + 1])) {
            context.negationActive = false;
            context.negationWindow = 0;
            return;
        }
        if (ruleProperties.getNegations().contains(token)) {
            // Handle double negations properly - if already negated, another negation cancels it
            if (context.negationActive) {
                context.negationActive = false;
                context.negationWindow = 0;
            } else {
                context.negationActive = true;
                context.negationWindow = ruleProperties.getNegationWindow();
            }
        }
    }

    /**
     * Calculates valence (positive/negative) with negation handling.
     *
     * @param token      the current token being processed
     * @param isPositive true if the token is positive
     * @param isNegative true if the token is negative
     * @param context    the sentiment context
     * @return the valence
     */
    private double calculateValence(String token, boolean isPositive, boolean isNegative, SentimentContext context) {
        if (isPositive && isNegative) {
            // A word can't be both positive and negative - this indicates data quality issues
            LOGGER.warn("Word '{}' found in both positive and negative word lists - this may indicate data quality issues in the sentiment lexicons", token);
            return 0.0;
        }
        double valence = isPositive ? 1.0 : -1.0;
        if (context.negationActive) {
            valence = -valence;
        }
        return valence;
    }

    /**
     * Calculates modifier based on boosters, dampeners, and contrastive weighting.
     *
     * @param tokens  the tokenized input text
     * @param index   the current index
     * @param context the sentiment context
     * @return the modifier
     */
    private double calculateModifier(String[] tokens, int index, SentimentContext context) {
        double modifier = 1.0;
        for (int j = Math.max(0, index - 2); j < index; j++) {
            String prev = tokens[j];
            if (prev.isBlank()) continue;

            Double boost = ruleProperties.getBoosters().get(prev);
            if (boost != null) {
                modifier += Math.abs(boost);
            }
            Double damp = ruleProperties.getDampeners().get(prev);
            if (damp != null) {
                modifier -= Math.abs(damp);
            }
        }
        if (modifier < 0.0) modifier = 0.0;
        if (context.afterContrastive) {
            modifier *= 1.2;  // Reduced from 1.5 to avoid overconfidence in mixed sentiment texts
            if (context.contrastiveCountdown > 0) {
                context.contrastiveCountdown--;
                if (context.contrastiveCountdown == 0) {
                    context.afterContrastive = false;
                }
            }
        }
        return modifier;
    }

    /**
     * Updates negation window after processing a sentiment-bearing token.
     *
     * @param context sentiment context
     */
    private void updateNegationWindow(SentimentContext context) {
        if (context.negationWindow > 0) {
            context.negationWindow--;
            if (context.negationWindow == 0) {
                context.negationActive = false;
            }
        }
    }

    /**
     * Apply fixed adjustments for multi-word expressions found in raw text.
     *
     * @param rawText the original input text
     * @return the cumulative adjustment from recognized phrases
     */
    private double calculatePhraseAdjustments(String rawText) {
        String text = rawText == null ? "" : rawText.toLowerCase();
        double adj = 0.0;
        for (var e : ruleProperties.getPhrases().entrySet()) {
            if (text.contains(e.getKey())) {
                adj += e.getValue();
            }
        }
        return adj;
    }

    /**
     * Context object to track sentiment processing state.
     */
    private static class SentimentContext {
        boolean negationActive = false;
        int negationWindow = 0;
        boolean afterContrastive = false;
        int contrastiveCountdown = 0;
    }

}