from __future__ import annotations from app.modules.rag.intent_router_v2.anchor_extractor import AnchorExtractor from app.modules.rag.intent_router_v2.anchor_span_validator import AnchorSpanValidator from app.modules.rag.intent_router_v2.conversation_anchor_builder import ConversationAnchorBuilder from app.modules.rag.intent_router_v2.keyword_hint_builder import KeywordHintBuilder from app.modules.rag.intent_router_v2.keyword_hint_sanitizer import KeywordHintSanitizer from app.modules.rag.intent_router_v2.models import ConversationState, QueryAnchor, QueryPlan from app.modules.rag.intent_router_v2.negation_detector import NegationDetector from app.modules.rag.intent_router_v2.normalization import QueryNormalizer from app.modules.rag.intent_router_v2.sub_intent_detector import SubIntentDetector from app.modules.rag.intent_router_v2.test_signals import has_test_focus, is_negative_test_request, is_test_related_token from app.modules.rag.intent_router_v2.term_mapping import RuEnTermMapper class QueryPlanBuilder: _WHY_MARKERS = ("почему", "зачем", "откуда", "из-за чего") _NEXT_STEP_MARKERS = ("что дальше", "дальше что", "и что теперь", "продолжай") def __init__( self, normalizer: QueryNormalizer | None = None, extractor: AnchorExtractor | None = None, mapper: RuEnTermMapper | None = None, keyword_hints: KeywordHintBuilder | None = None, keyword_hint_sanitizer: KeywordHintSanitizer | None = None, carryover: ConversationAnchorBuilder | None = None, span_validator: AnchorSpanValidator | None = None, sub_intent_detector: SubIntentDetector | None = None, negation_detector: NegationDetector | None = None, ) -> None: self._normalizer = normalizer or QueryNormalizer() self._extractor = extractor or AnchorExtractor() self._mapper = mapper or RuEnTermMapper() self._keyword_hints_builder = keyword_hints or KeywordHintBuilder() self._keyword_hint_sanitizer = keyword_hint_sanitizer or KeywordHintSanitizer() self._carryover = carryover or ConversationAnchorBuilder() self._span_validator = span_validator or AnchorSpanValidator() self._sub_intent_detector = sub_intent_detector or SubIntentDetector() self._negation_detector = negation_detector or NegationDetector() def build( self, user_query: str, conversation_state: ConversationState, continue_mode: bool, *, conversation_mode: str = "START", intent: str = "PROJECT_MISC", ) -> QueryPlan: raw = user_query or "" normalized = self._normalizer.normalize(raw) if not normalized and raw.strip(): normalized = raw negations = self._negation_detector.detect(normalized) user_anchors = self._span_validator.sanitize(self._extractor.extract(raw), len(raw)) has_file_path = any(anchor.type == "FILE_PATH" and anchor.source == "user_text" for anchor in user_anchors) sub_intent = self._sub_intent_detector.detect(raw, has_file_path=has_file_path, negations=negations) merged_anchors = self._merge_anchors( raw, user_anchors, conversation_state, continue_mode, conversation_mode=conversation_mode, intent=intent, ) skip_tests = "tests" in negations or is_negative_test_request(raw) cleaned_anchors = self._remove_negated_test_terms(skip_tests, merged_anchors) sub_intent = self._resolve_sub_intent(sub_intent, raw, cleaned_anchors, intent=intent, negations=negations) if intent == "DOCS_QA": sub_intent = "EXPLAIN" expansions = self._expansions(normalized, cleaned_anchors, skip_tests=skip_tests) keyword_hints = self._keyword_hints( raw, normalized, cleaned_anchors, skip_tests=skip_tests, intent=intent, state=conversation_state, ) return QueryPlan( raw=raw, normalized=normalized, sub_intent=sub_intent, negations=sorted(negations), expansions=expansions, keyword_hints=keyword_hints, anchors=cleaned_anchors, ) def _merge_anchors( self, raw: str, anchors: list[QueryAnchor], state: ConversationState, continue_mode: bool, *, conversation_mode: str, intent: str, ) -> list[QueryAnchor]: has_user_symbol = any(anchor.type == "SYMBOL" and anchor.source == "user_text" for anchor in anchors) has_user_file = any(anchor.type == "FILE_PATH" and anchor.source == "user_text" for anchor in anchors) inherited = self._carryover.build( raw, state, continue_mode=continue_mode, has_user_symbol=has_user_symbol, has_user_file_path=has_user_file, ) if ( conversation_mode == "SWITCH" and intent == "DOCS_QA" and not has_user_file and not has_user_symbol and state.active_symbol ): inherited.append( QueryAnchor( type="SYMBOL", value=state.active_symbol, source="conversation_state", span=None, confidence=0.62, ) ) return self._dedupe(anchors + inherited) def _expansions(self, normalized: str, anchors: list[QueryAnchor], *, skip_tests: bool) -> list[str]: values = self._mapper.expand(normalized) has_symbol = any(anchor.type == "SYMBOL" for anchor in anchors) if has_symbol: values = [value for value in values if value.lower() not in {"def", "class"}] if not skip_tests and has_test_focus(normalized): for candidate in ("test", "unit test"): if candidate not in values: values.append(candidate) for anchor in anchors: if anchor.type == "SYMBOL" and anchor.value not in values: values.append(anchor.value) if skip_tests: values = [value for value in values if not is_test_related_token(value)] return values[:16] def _keyword_hints( self, raw: str, normalized: str, anchors: list[QueryAnchor], *, skip_tests: bool, intent: str, state: ConversationState, ) -> list[str]: values = self._keyword_hints_builder.build(normalized) for anchor in anchors: if anchor.type not in {"FILE_PATH", "SYMBOL"}: continue candidate = anchor.value if candidate not in values: values.append(candidate) if skip_tests: values = [value for value in values if not is_test_related_token(value)] sanitized = self._keyword_hint_sanitizer.sanitize(raw, anchors, values) if intent == "DOCS_QA" and not sanitized: fallback = list(dict.fromkeys([*self._expansions(normalized, anchors, skip_tests=skip_tests)])) sanitized = fallback[:3] if state.active_symbol and state.active_symbol not in sanitized: sanitized.append(state.active_symbol) sanitized = sanitized[:5] return sanitized def _remove_negated_test_terms(self, skip_tests: bool, anchors: list[QueryAnchor]) -> list[QueryAnchor]: if not skip_tests: return anchors result: list[QueryAnchor] = [] for anchor in anchors: if anchor.type not in {"KEY_TERM", "SYMBOL"}: result.append(anchor) continue if is_test_related_token(anchor.value): continue result.append(anchor) return result def _dedupe(self, anchors: list[QueryAnchor]) -> list[QueryAnchor]: result: list[QueryAnchor] = [] seen: set[tuple[str, str, str | None, str]] = set() for anchor in anchors: key = (anchor.type, anchor.value, anchor.subtype, anchor.source) if key in seen: continue seen.add(key) result.append(anchor) return result def _resolve_sub_intent( self, candidate: str, raw: str, anchors: list[QueryAnchor], *, intent: str, negations: set[str], ) -> str: if candidate != "EXPLAIN": return candidate if intent != "CODE_QA": return candidate text = " ".join((raw or "").lower().split()) has_symbol = any(anchor.type == "SYMBOL" and anchor.confidence >= 0.6 for anchor in anchors) has_file = any(anchor.type == "FILE_PATH" and self._looks_like_file(anchor.value) and anchor.confidence >= 0.6 for anchor in anchors) has_user_anchor = any(anchor.source == "user_text" for anchor in anchors) is_why = any(marker in text for marker in self._WHY_MARKERS) is_next_steps = any(marker in text for marker in self._NEXT_STEP_MARKERS) is_short_generic = len(text.split()) <= 4 and text.endswith("?") if (is_why and has_file and has_symbol) or ((is_next_steps or is_short_generic) and has_file): return "EXPLAIN_LOCAL" if "tests" in negations and not has_user_anchor and (has_file or has_symbol): return "EXPLAIN_LOCAL" return candidate def _looks_like_file(self, value: str) -> bool: tail = (value or "").rsplit("/", 1)[-1] return "." in tail