#!/usr/bin/env python3
"""
starhtml-check — Static analyzer for StarHTML code.
Designed for LLM tool-call loops: minimal tokens, maximum signal.
Usage:
python starhtml_check.py
python starhtml_check.py --code "..."
python starhtml_check.py --summary
python starhtml_check.py --update
"""
import ast
import re
import sys
import argparse
import hashlib
import shutil
from pathlib import Path
from dataclasses import dataclass
from typing import Literal, Optional
GITHUB_RAW_URL = "https://raw.githubusercontent.com/renatocaliari/starhtml-skill/main/starhtml_check.py"
def get_checker_location() -> Path:
"""Get the path to the current checker script."""
return Path(__file__).resolve()
def is_globally_installed() -> bool:
"""Check if checker is installed in a global bin directory."""
loc = get_checker_location()
global_paths = [
Path("/usr/local/bin"),
Path("/usr/bin"),
Path.home() / ".local" / "bin",
]
return any(loc.is_relative_to(p) for p in global_paths if p.exists())
def get_latest_checker() -> str:
"""Fetch the latest checker from GitHub."""
try:
import urllib.request
with urllib.request.urlopen(GITHUB_RAW_URL, timeout=10) as response:
return response.read().decode("utf-8")
except Exception as e:
print(f"ERROR: Failed to fetch latest version from GitHub: {e}")
sys.exit(1)
def check_for_update() -> tuple[bool, str]:
"""Check if a newer version is available on GitHub."""
current_path = get_checker_location()
try:
with open(current_path, "r") as f:
current_content = f.read()
except Exception as e:
print(f"ERROR: Failed to read current checker: {e}")
sys.exit(1)
latest_content = get_latest_checker()
current_hash = hashlib.sha256(current_content.encode()).hexdigest()
latest_hash = hashlib.sha256(latest_content.encode()).hexdigest()
if current_hash == latest_hash:
return False, "✓ You already have the latest version"
# Count lines to give a rough idea of changes
current_lines = len(current_content.splitlines())
latest_lines = len(latest_content.splitlines())
diff = latest_lines - current_lines
diff_str = f"+{diff}" if diff > 0 else str(diff)
return True, f"Update available ({diff_str} lines)"
def update_checker(interactive: bool = True) -> None:
"""Update the checker to the latest version from GitHub."""
current_path = get_checker_location()
print(f"Checker location: {current_path}")
# Check for update
has_update, message = check_for_update()
print(message)
if not has_update:
sys.exit(0)
# Fetch latest version
print("\nFetching latest version from GitHub...")
latest_content = get_latest_checker()
# Create backup
backup_path = current_path.with_suffix(current_path.suffix + ".bak")
try:
shutil.copy2(current_path, backup_path)
print(f"Backup created: {backup_path}")
except Exception as e:
print(f"WARNING: Failed to create backup: {e}")
# Write new version
try:
with open(current_path, "w") as f:
f.write(latest_content)
print(f"✓ Updated {current_path}")
# Make executable (Unix-like systems)
try:
current_path.chmod(current_path.stat().st_mode | 0o111)
except Exception:
pass
print("\n✓ Update complete! Run 'starhtml_check --update' again to check for future updates.")
except Exception as e:
print(f"ERROR: Failed to write new version: {e}")
# Restore backup if update failed
if backup_path.exists():
try:
shutil.copy2(backup_path, current_path)
print("Restored backup successfully")
except Exception:
pass
sys.exit(1)
@dataclass
class Issue:
level: Literal["ERROR", "WARNING"]
line: int
code: str
message: str
original: str = ""
fix: str = ""
def __str__(self):
lines = [f" L{self.line} [{self.code}] {self.message}"]
if self.original:
lines.append(f" GOT: {self.original.strip()}")
if self.fix:
fix_lines = self.fix.strip().split("\n")
for i, fl in enumerate(fix_lines):
prefix = " FIX: " if i == 0 else " "
lines.append(prefix + fl)
return "\n".join(lines)
REACTIVE_FSTRING_ATTRS = {
"data_text", "data_html", "data_value", "data_href", "data_src",
"data_style_left", "data_style_top", "data_style_width", "data_style_height",
"data_style_opacity", "data_style_transform", "data_style_background",
}
REACTIVE_PREFIXES = ("data_on_", "data_class_", "data_style_", "data_attr_", "data_bind")
HTTP_ACTIONS = {"get", "post", "put", "patch", "delete"}
# Plugin-specific data attributes
PLUGIN_DATA_ATTRS = {
"persist": {"data_persist"},
"scroll": {"data_scroll", "data_scroll_into_view"},
"resize": {"data_resize"},
"drag": {"data_drag", "data_drop_zone"},
"canvas": {"data_canvas"},
"position": {"data_position"},
"motion": {
"data_motion", "data_motion_enter", "data_motion_exit",
"data_motion_hover", "data_motion_press", "data_motion_in_view",
"data_motion_scroll_link", "data_on_motion_start",
"data_on_motion_complete", "data_on_motion_cancel"
},
"markdown": {"data_markdown"},
"katex": {"data_katex"},
"mermaid": {"data_mermaid"},
"split": {"data_split"},
}
TAILWIND_SIZE_PATTERN = re.compile(r"(size-|w-|h-)(\d+|\d+/\d+|full|screen|min|max|px|auto|fit)")
class StarHTMLAnalyzer(ast.NodeVisitor):
def __init__(self, source: str):
self.lines = source.splitlines()
self.issues: list[Issue] = []
self.signals: list[str] = []
self.events: list[str] = []
self.reactive_attrs: list[str] = []
self._seen_signals: set[str] = set()
self._defined_signals: set[str] = set() # Signals definidos
self._used_signals: dict[int, tuple[str, str]] = {} # lineno -> (signal_name, attr)
self._has_f_import = False
self._uses_f_helper: list[int] = []
self._sse_functions: list[str] = []
self._sse_has_yield_signals: set[str] = set()
self._current_func: str = ""
# Plugin tracking
self._registered_plugins: set[str] = set()
self._used_plugin_attrs: dict[int, tuple[str, str]] = {} # lineno -> (attr, plugin_name)
# Track f() usage with signal count for I003
self._f_helper_usage: list[tuple[int, int]] = [] # (lineno, signal_count)
# Track .then() calls for W023
self._then_calls: list[int] = [] # lineno
# Track data_effect for W024
self._data_effect_usage: list[int] = [] # lineno
# Track switch/collect usage for W021/W022
self._switch_usage: list[int] = [] # lineno
self._collect_usage: list[int] = [] # lineno
# Track signal operators for I004
self._and_chains: list[int] = [] # lineno
# Track component functions for W025
self._component_functions: list[tuple[str, int, bool]] = [] # (name, lineno, has_kwargs)
# Track js() usage for W030 (LoB violations)
self._js_calls: list[tuple[int, str]] = [] # (lineno, js_code)
# Track deep nesting in components
self._max_nesting_depth: int = 0
self._deep_nesting_locations: list[tuple[int, int]] = [] # (lineno, depth)
# Track signals used in backend (HTTP actions) for W029
self._backend_signals: set[str] = set()
self._all_signal_definitions: dict[str, int] = {} # signal_name -> lineno
def visit_ImportFrom(self, node: ast.ImportFrom):
if node.module == "starhtml.datastar":
for alias in node.names:
if alias.name == "f":
self._has_f_import = True
# Track plugin imports: from starhtml.plugins import persist, scroll, etc.
if node.module == "starhtml.plugins":
for alias in node.names:
self._registered_plugins.add(alias.name)
# E020: Direct Datastar import — StarHTML manages Datastar automatically
if node.module and "datastar" in node.module and "starhtml" not in node.module:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E020",
message="Direct Datastar import — StarHTML manages Datastar automatically, do not import",
original=self._get_line(node.lineno),
fix="Remove: StarHTML includes Datastar. Use: from starhtml import *"
))
self.generic_visit(node)
def visit_FunctionDef(self, node: ast.FunctionDef):
self._current_func = node.name
# Check if function has **kwargs (for W025 - component functions)
has_kwargs = any(isinstance(arg, ast.kwarg) for arg in node.args.kwonlyargs) or \
(node.args.kwarg is not None)
# Check if function returns HTML elements (simple heuristic: has Div, Span, etc. in body)
returns_html = False
for child in ast.walk(node):
if isinstance(child, ast.Call):
if isinstance(child.func, ast.Name) and child.func.id in {
"Div", "Span", "Button", "Input", "Form", "Label",
"Select", "Textarea", "Ul", "Ol", "Li", "Table", "Tr",
"Td", "Th", "H1", "H2", "H3", "H4", "H5", "H6", "P",
"A", "Img", "Canvas", "Svg", "Nav", "Header", "Footer",
"Main", "Section", "Article", "Aside"
}:
returns_html = True
break
# Only track as component if it returns HTML and is not an SSE handler or utility function
# SSE handlers typically have @sse decorator or yield statements
is_sse_handler = any(name == node.name for name, _ in self._sse_functions)
has_yield = any(isinstance(child, ast.Yield) for child in ast.walk(node))
is_utility = node.name.startswith("_") or ("todo" in node.name.lower() and "render" in node.name.lower())
if returns_html and not is_sse_handler and not has_yield and not is_utility:
self._component_functions.append((node.name, node.lineno, has_kwargs))
# Calculate nesting depth for this component
self._calculate_nesting_depth(node)
for decorator in node.decorator_list:
is_sse = False
# Handle @sse, @app.sse, and aliased imports
if isinstance(decorator, ast.Name) and decorator.id == "sse":
is_sse = True
elif isinstance(decorator, ast.Attribute) and decorator.attr == "sse":
is_sse = True # Handles @app.sse, @starhtml.sse, etc.
if is_sse:
self._sse_functions.append((node.name, node.lineno))
self.generic_visit(node)
self._current_func = ""
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
self.visit_FunctionDef(node)
def visit_Call(self, node: ast.Call):
func_name = ""
if isinstance(node.func, ast.Name):
func_name = node.func.id
elif isinstance(node.func, ast.Attribute):
func_name = node.func.attr
# E018: len(signal) — Signals don't support len()
if func_name == "len":
if node.args and isinstance(node.args[0], ast.Name):
arg_name = node.args[0].id
if arg_name in self._defined_signals:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E018",
message=f"`len({arg_name})` — Signals don't support len(); use Python variable for data",
original=self._get_line(node.lineno),
fix=f"Store data in Python: {arg_name}_data = [] # then len({arg_name}_data)"
))
# E019: signals() with positional arguments
if func_name == "signals":
# Check if any positional arguments (other than the first optional only_if_missing)
positional_args = []
for i, arg in enumerate(node.args):
# First arg could be only_if_missing (boolean)
if i == 0 and isinstance(arg, ast.Constant) and isinstance(arg.value, bool):
continue # Valid: signals(True) or signals(False)
positional_args.append(arg)
if positional_args:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E019",
message="`signals()` with positional arguments — use keyword arguments only",
original=self._get_line(node.lineno),
fix="Use kwargs: yield signals(count=1, status='done') # NOT signals(count, status)"
))
# E001: positional arg after keyword — SyntaxError
# Note: Python's parser catches this, but we document it for completeness
# The AST won't even be generated if this error exists in the source
# E002: f-string in reactive attribute
for kw in node.keywords:
if kw.arg:
arg_name = kw.arg
is_reactive = (arg_name in REACTIVE_FSTRING_ATTRS or
any(arg_name.startswith(p) for p in REACTIVE_PREFIXES))
if is_reactive and isinstance(kw.value, ast.JoinedStr):
self.issues.append(Issue(
level="ERROR",
line=kw.lineno,
code="E002",
message="f-string in reactive attribute — static, won't update in browser",
original=self._get_line(kw.lineno),
fix='Use + operator: "Label: " + signal\n'
'Or f() helper: f("Label: {s}", s=signal) for 3+ signals'
))
# E003: f-string URL in HTTP action
if func_name in HTTP_ACTIONS:
if node.args and isinstance(node.args[0], ast.JoinedStr):
# Check if this is a method call (e.g., data.get()) vs standalone function
# data.get() is a dict method, not HTTP action - FALSE POSITIVE if flagged
is_method_call = isinstance(node.func, ast.Attribute)
if is_method_call:
# This is like data.get() - a dict method, not HTTP action
# Skip the check - it's a false positive
pass
else:
# Standalone HTTP action (get, post, etc.) - check if f-string uses a Signal
# Get the variable names used in the f-string
fstring_var_names = self._extract_fstring_variables(node.args[0])
# Check if any variable in the f-string is a Signal
uses_signal = any(var_name in self._defined_signals for var_name in fstring_var_names)
if uses_signal:
# Variable is a Signal - this is a real error
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E003",
message="f-string URL in HTTP action — signal value is static, won't update in browser",
original=self._get_line(node.lineno),
fix='Pass signal as parameter: get("/api/item", id=item_id_sig)'
))
# else: variable is not a Signal (e.g., todo_id from function parameter)
# This is a false positive - don't report
# E004: special chars in data_class_* keyword name
for kw in node.keywords:
if kw.arg and kw.arg.startswith("data_class_"):
suffix = kw.arg[len("data_class_"):]
if any(c in suffix for c in ":/[\\]"):
self.issues.append(Issue(
level="ERROR",
line=kw.lineno,
code="E004",
message=f"special chars in `data_class_*` keyword name — Python parse error",
original=self._get_line(kw.lineno),
fix='Use data_attr_class: data_attr_class=sig.if_("hover:bg-blue-500", "")'
))
# E007: data_attr_class and data_attr_cls on same element
kw_args = {kw.arg for kw in node.keywords if kw.arg}
if "data_attr_class" in kw_args and "data_attr_cls" in kw_args:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E007",
message="`data_attr_class` and `data_attr_cls` on same element — different behaviors",
original=self._get_line(node.lineno),
fix="Use only one: data_attr_class replaces, data_attr_cls adds to base cls="
))
# E009: data_show without flash prevention (UX bug)
has_data_show = any(kw.arg == "data_show" for kw in node.keywords)
if has_data_show:
has_flash_prevention = False
for kw in node.keywords:
if kw.arg == "style" and isinstance(kw.value, ast.Constant):
if "display" in str(kw.value.value).lower():
has_flash_prevention = True
if kw.arg == "data_style_display" and isinstance(kw.value, ast.Constant):
if str(kw.value.value).lower() == "none":
has_flash_prevention = True
if kw.arg == "cls" and isinstance(kw.value, ast.Constant):
if "hidden" in str(kw.value.value).lower():
has_flash_prevention = True
if kw.arg == "data_class_hidden":
has_flash_prevention = True
if kw.arg == "data_style_opacity":
has_flash_prevention = True
if not has_flash_prevention:
# Check if it's an input-like element
is_input_like = func_name in {"Input", "Form", "Select", "Textarea", "Script"}
if not is_input_like:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E009",
message="`data_show` without flash prevention — element flashes visible before JS loads",
original=self._get_line(node.lineno),
fix='Add style="display:none": Div("content", style="display:none", data_show=is_open)'
))
# E011: data_on_scroll without throttle or data_on_input without debounce (performance bug)
for kw in node.keywords:
if kw.arg == "data_on_scroll":
has_throttle = self._has_modifier(kw.value, "throttle")
if not has_throttle:
self.issues.append(Issue(
level="ERROR",
line=kw.lineno,
code="E011",
message="`data_on_scroll` without throttle — performance bug",
original=self._get_line(kw.lineno),
fix='Add throttle: data_on_scroll=(handler, {"throttle": 16})'
))
if kw.arg == "data_on_input":
has_debounce = self._has_modifier(kw.value, "debounce")
if not has_debounce:
self.issues.append(Issue(
level="ERROR",
line=kw.lineno,
code="E011",
message="`data_on_input` without debounce — performance bug",
original=self._get_line(kw.lineno),
fix='Add debounce: data_on_input=(handler, {"debounce": 300})'
))
# E013: Icon() without explicit size (layout bug)
if func_name == "Icon":
has_size = False
for kw in node.keywords:
if kw.arg in ("size", "width", "height"):
has_size = True
if kw.arg == "cls" and isinstance(kw.value, ast.Constant):
cls_val = str(kw.value.value)
if TAILWIND_SIZE_PATTERN.search(cls_val):
has_size = True
if not has_size:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E013",
message="`Icon()` without explicit size — inherits 1em from font-size (layout issue)",
original=self._get_line(node.lineno),
fix='Add size: Icon("lucide:home", size=24)'
))
# E014: js() raw JavaScript (security risk)
if func_name == "js":
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E014",
message="`js()` raw JavaScript — potential security risk with user input",
original=self._get_line(node.lineno),
fix="Use signal references: (item := Signal('item', val)); js('doSomething($item)')"
))
# Track js() calls for W030 (LoB violations)
js_code = self._get_line(node.lineno)
self._js_calls.append((node.lineno, js_code))
# Track signals used in HTTP actions (for W029 - frontend-only signals)
if func_name in HTTP_ACTIONS:
for kw in node.keywords:
if kw.arg and isinstance(kw.value, ast.Name):
self._backend_signals.add(kw.value.id)
# W017: Computed Signal (expression as initial value, auto-updates)
if func_name == "Signal":
if len(node.args) >= 2:
second_arg = node.args[1]
# True computed signals use operators or function calls (reactive expressions)
is_computed = isinstance(second_arg, (
ast.BinOp, # price * quantity
ast.BoolOp, # a and b and c
ast.UnaryOp, # ~visible
ast.Call, # all(a, b, c), format(x, y)
))
# Variable references and subscripts are NOT computed (just data passing)
is_reference = isinstance(second_arg, (
ast.Name, # count, my_var
ast.Subscript, # todo["completed"], data[key]
ast.Attribute, # user.name, obj.prop
))
# Only flag if it's a computed expression, not a simple reference
if is_computed and not is_reference:
self.issues.append(Issue(
level="WARNING",
line=node.lineno,
code="W017",
message="Computed Signal detected (expression as initial value, auto-updates)",
original=self._get_line(node.lineno)
))
# I004: _ref_only=True
for kw in node.keywords:
if kw.arg == "_ref_only" and isinstance(kw.value, ast.Constant) and kw.value.value is True:
self.issues.append(Issue(
level="WARNING",
line=node.lineno,
code="W018",
message="`_ref_only=True` Signal — correctly excluded from `data-signals` HTML output",
original=self._get_line(node.lineno)
))
# W015: delete() HTTP action without confirmation (UX risk)
if func_name == "delete":
# Check if delete is inside a confirmation pattern (AlertDialog, confirm, etc.)
line_text = self._get_line(node.lineno).lower()
has_confirmation_pattern = (
"alertdialog" in line_text or
"alert_dialog" in line_text or
"confirm" in line_text
)
# Only warn if no confirmation pattern detected
if not has_confirmation_pattern:
self.issues.append(Issue(
level="WARNING",
line=node.lineno,
code="W015",
message="`delete()` HTTP action — verify confirmation UX exists (AlertDialog, confirm dialog, etc.)",
original=self._get_line(node.lineno)
))
# W009: f-string in elements() selector
if func_name == "elements":
if len(node.args) >= 2 and isinstance(node.args[1], ast.JoinedStr):
self.issues.append(Issue(
level="WARNING",
line=node.lineno,
code="W019",
message="f-string in elements() selector — verify selector is static or use signal concatenation",
original=self._get_line(node.lineno),
fix='If dynamic: elements(content, "#target-" + id_sig)\n'
'If static: elements(content, "#todo-123") # OK'
))
# Track plugin data attributes usage
for kw in node.keywords:
if kw.arg:
# Check each plugin's attributes
for plugin_name, attrs in PLUGIN_DATA_ATTRS.items():
if kw.arg in attrs:
self._used_plugin_attrs[kw.lineno] = (kw.arg, plugin_name)
break
# Track f() usage with signal count for I003
if func_name == "f":
self._uses_f_helper.append(node.lineno)
# Count keyword arguments as signal count
signal_count = len(node.keywords)
self._f_helper_usage.append((node.lineno, signal_count))
# Track switch() and collect() usage for W021/W022
if func_name == "switch":
self._switch_usage.append(node.lineno)
if func_name == "collect":
self._collect_usage.append(node.lineno)
# Track .then() calls for W023
if isinstance(node.func, ast.Attribute) and node.func.attr == "then":
self._then_calls.append(node.lineno)
# Track data_effect for W024
for kw in node.keywords:
if kw.arg == "data_effect":
self._data_effect_usage.append(node.lineno)
# Track SSE yield signals
if func_name == "signals" and self._current_func:
# Check if current function is an SSE function
for sse_name, _ in self._sse_functions:
if sse_name == self._current_func:
self._sse_has_yield_signals.add(self._current_func)
break
# Track signals used in reactive attributes (W016: undefined signals)
for kw in node.keywords:
if kw.arg and kw.arg.startswith("data_"):
# Check if value is a signal reference (simple Name node)
if isinstance(kw.value, ast.Name):
self._used_signals[kw.lineno] = (kw.value.id, kw.arg)
# Check for signal.method() calls like count.add(1)
elif isinstance(kw.value, ast.Attribute) and isinstance(kw.value.value, ast.Name):
self._used_signals[kw.lineno] = (kw.value.value.id, kw.arg)
# Check for binary operations like count > 10
elif isinstance(kw.value, ast.Compare):
if isinstance(kw.value.left, ast.Name):
self._used_signals[kw.lineno] = (kw.value.left.id, kw.arg)
# Check for unary operations like ~is_running
elif isinstance(kw.value, ast.UnaryOp):
if isinstance(kw.value.operand, ast.Name):
self._used_signals[kw.lineno] = (kw.value.operand.id, kw.arg)
# Collect events and reactive attrs
for kw in node.keywords:
if kw.arg:
if kw.arg.startswith("data_on_"):
self.events.append(f"{kw.arg}(L{kw.lineno})")
elif kw.arg.startswith("data_"):
self.reactive_attrs.append(kw.arg)
self.generic_visit(node)
def visit_NamedExpr(self, node: ast.NamedExpr):
if isinstance(node.value, ast.Call):
if isinstance(node.value.func, ast.Name) and node.value.func.id == "Signal":
if isinstance(node.target, ast.Name):
sig_name = node.target.id
if sig_name not in self._seen_signals:
self.signals.append(sig_name)
self._seen_signals.add(sig_name)
self._defined_signals.add(sig_name)
self._all_signal_definitions[sig_name] = node.lineno
self.generic_visit(node)
def visit_Assign(self, node: ast.Assign):
if isinstance(node.value, ast.Call):
if isinstance(node.value.func, ast.Name) and node.value.func.id == "Signal":
for target in node.targets:
if isinstance(target, ast.Name):
sig_name = target.id
if sig_name not in self._seen_signals:
self.signals.append(sig_name)
self._seen_signals.add(sig_name)
self._defined_signals.add(sig_name)
self._all_signal_definitions[sig_name] = node.lineno
self.generic_visit(node)
def visit_BinOp(self, node: ast.BinOp):
# Track & (BitAnd) operator chains for W003
if isinstance(node.op, ast.BitAnd):
self._and_chains.append(node.lineno)
self.generic_visit(node)
def visit_Attribute(self, node: ast.Attribute):
# E017: Signal.value usage — Signals don't have .value attribute
if node.attr == "value":
# Check if this is a Signal (defined in _defined_signals)
if isinstance(node.value, ast.Name) and node.value.id in self._defined_signals:
self.issues.append(Issue(
level="ERROR",
line=node.lineno,
code="E017",
message=f"`{node.value.id}.value` — Signals don't have .value attribute; use Python variables for data",
original=self._get_line(node.lineno),
fix=f"Store data in Python: {node.value.id}_data = [] # not Signal"
))
self.generic_visit(node)
def visit_If(self, node: ast.If):
# W031: Signal used as Python boolean — Signals aren't data containers
self._check_signal_as_boolean(node.test, node.lineno)
self.generic_visit(node)
def visit_IfExp(self, node: ast.IfExp):
# W031: Signal used as Python boolean in ternary expression
self._check_signal_as_boolean(node.test, node.lineno)
self.generic_visit(node)
def visit_While(self, node: ast.While):
# W031: Signal used as Python boolean in while loop
self._check_signal_as_boolean(node.test, node.lineno)
self.generic_visit(node)
def _check_signal_as_boolean(self, test_node: ast.AST, lineno: int) -> None:
"""Check if a Signal is used as a boolean in Python conditionals."""
# Direct: if is_saving:
if isinstance(test_node, ast.Name):
if test_node.id in self._defined_signals:
self.issues.append(Issue(
level="WARNING",
line=lineno,
code="W031",
message=f"Signal `{test_node.id}` used as Python boolean — Signals aren't data containers",
original=self._get_line(lineno),
fix=f"Use reactive attribute: data_show={test_node.id} or data_text={test_node.id}.if_(\"true\", \"false\")"
))
# Negation: if not is_saving:
elif isinstance(test_node, ast.UnaryOp) and isinstance(test_node.op, ast.Not):
if isinstance(test_node.operand, ast.Name):
if test_node.operand.id in self._defined_signals:
self.issues.append(Issue(
level="WARNING",
line=lineno,
code="W031",
message=f"Signal `{test_node.operand.id}` used as Python boolean — Signals aren't data containers",
original=self._get_line(lineno),
fix=f"Use reactive attribute: data_show=~{test_node.operand.id} or data_text=~{test_node.operand.id}.if_(\"false\", \"true\")"
))
# Boolean ops: if is_saving and is_valid:
elif isinstance(test_node, ast.BoolOp):
for value in test_node.values:
self._check_signal_as_boolean(value, lineno)
def _calculate_nesting_depth(self, node: ast.AST, current_depth: int = 0, max_depth: int = 10) -> int:
"""Calculate maximum nesting depth of HTML elements in a node."""
if current_depth > max_depth:
return current_depth
html_elements = {
"Div", "Span", "Button", "Input", "Form", "Label",
"Select", "Textarea", "Ul", "Ol", "Li", "Table", "Tr",
"Td", "Th", "H1", "H2", "H3", "H4", "H5", "H6", "P",
"A", "Img", "Canvas", "Svg", "Nav", "Header", "Footer",
"Main", "Section", "Article", "Aside", "Card", "Modal"
}
max_child_depth = current_depth
if isinstance(node, ast.Call):
if isinstance(node.func, ast.Name) and node.func.id in html_elements:
current_depth += 1
if current_depth > 5: # Threshold for warning (was 3)
self._deep_nesting_locations.append((node.lineno, current_depth))
for child in ast.iter_child_nodes(node):
child_depth = self._calculate_nesting_depth(child, current_depth, max_depth)
max_child_depth = max(max_child_depth, child_depth)
return max_child_depth
def _get_line(self, lineno: int) -> str:
if 1 <= lineno <= len(self.lines):
return self.lines[lineno - 1]
return ""
def _has_modifier(self, value: ast.AST, modifier: str) -> bool:
"""Check if a value has a modifier (throttle/debounce)."""
if isinstance(value, ast.Tuple) and len(value.elts) >= 2:
second = value.elts[1]
if isinstance(second, ast.Dict):
for key in second.keys:
if isinstance(key, ast.Constant) and key.value == modifier:
return True
if isinstance(second, ast.Call):
if isinstance(second.func, ast.Name) and second.func.id == "dict":
for kw in second.keywords:
if kw.arg == modifier:
return True
return False
def _extract_fstring_variables(self, node: ast.JoinedStr) -> list[str]:
"""Extract variable names from an f-string."""
var_names = []
for elt in node.values:
if isinstance(elt, ast.FormattedValue):
if isinstance(elt.value, ast.Name):
var_names.append(elt.value.id)
elif isinstance(elt.value, ast.Attribute):
# Handle things like obj.attr
if isinstance(elt.value.value, ast.Name):
var_names.append(elt.value.value.id)
return var_names
def check_regex(source: str, issues: list[Issue], lines: list[str]) -> None:
"""Regex-based checks that complement AST analysis."""
# E020: Direct Datastar CDN script — StarHTML manages Datastar automatically
datastar_cdn_pattern = re.compile(r'@getdatastar/datastar|datastar.*\.min\.js')
for i, line in enumerate(lines, 1):
if datastar_cdn_pattern.search(line):
issues.append(Issue(
level="ERROR",
line=i,
code="E020",
message="Direct Datastar CDN — StarHTML manages Datastar automatically, do not add manually",
original=line.strip(),
fix="Remove: StarHTML includes Datastar. Use: from starhtml import *"
))
# E005: camelCase Signal name (includes PascalCase and camelCase)
signal_name_pattern = re.compile(r'Signal\s*\(\s*["\']([a-zA-Z_][a-zA-Z0-9_]*)["\']')
for i, line in enumerate(lines, 1):
match = signal_name_pattern.search(line)
if match:
name = match.group(1)
# Detect camelCase (lowerUpper) or PascalCase (UpperUpper like XMLParser)
# but allow snake_case (lower_lower) and _underscore_prefix
has_camel = bool(re.search(r"[a-z][A-Z]", name)) # lowerUpper
is_pascal_case = bool(re.match(r"^[A-Z][a-zA-Z0-9]*$", name)) # PascalCase puro
is_snake_case = "_" in name and name.islower() # snake_case
is_underscore_prefix = name.startswith("_") and (name[1:].islower() or "_" in name[1:])
if (has_camel or is_pascal_case) and not is_snake_case and not is_underscore_prefix:
snake_case = re.sub(r"([a-z])([A-Z])", r"\1_\2", name).lower()
issues.append(Issue(
level="ERROR",
line=i,
code="E005",
message="camelCase Signal name — must be snake_case",
original=line.strip(),
fix=f'Rename to snake_case: Signal("{snake_case}", ...)'
))
# W012: Empty Signal name
empty_signal_pattern = re.compile(r'Signal\s*\(\s*["\']["\']')
for i, line in enumerate(lines, 1):
if empty_signal_pattern.search(line):
issues.append(Issue(
level="WARNING",
line=i,
code="W012",
message="Signal with empty name — use descriptive snake_case names",
original=line.strip(),
fix='Signal("counter", 0) instead of Signal("", 0)'
))
# E008: walrus := without outer parens (BREAKS reactivity - Signal not passed)
for i, line in enumerate(lines, 1):
stripped = line.lstrip()
if ":= Signal(" in line and not stripped.startswith("("):
issues.append(Issue(
level="ERROR",
line=i,
code="E008",
message="walrus `:=` Signal without outer parentheses — won't register as positional arg, breaks reactivity",
original=line.strip(),
fix="Wrap in parens: (name := Signal(\"name\", \"\"))"
))
# E010: form submit without is_valid guard (functional bug)
for i, line in enumerate(lines, 1):
if "data_on_submit" in line and "post(" in line:
has_guard = any(x in line for x in ["is_valid", ".then(", "if_("])
if not has_guard:
issues.append(Issue(
level="ERROR",
line=i,
code="E010",
message="form submit fires `post()` without `is_valid` guard — submits invalid data",
original=line.strip(),
fix="Add guard: is_valid.then(post(\"/api/save\"))"
))
# W008: Signal name too short
short_signal_pattern = re.compile(r'Signal\s*\(\s*["\']([a-z_]{1,2})["\']')
for i, line in enumerate(lines, 1):
match = short_signal_pattern.search(line)
if match:
name = match.group(1)
# Count non-underscore chars
useful_chars = len([c for c in name if c != "_"])
if useful_chars <= 1:
issues.append(Issue(
level="WARNING",
line=i,
code="W008",
message="Signal name too short — prefer descriptive snake_case names",
original=line.strip(),
fix='Use descriptive name: Signal("counter", 0) instead of Signal("x", 0)'
))
# W020: elements() replace-mode — check if element has id matching selector
# Only warn if element does NOT have explicit id or selector doesn't match
for i, line in enumerate(lines, 1):
# Skip docstrings and comment lines
stripped = line.strip()
if stripped.startswith('"""') or stripped.startswith("'''") or stripped.startswith("#"):
continue
if "elements(" in line:
# Check if this line or next few lines have append/prepend
context_lines = "\n".join(lines[i-1:min(i+3, len(lines))])
has_append_prepend = any(x in context_lines for x in ["\"append\"", "\"prepend\"", "'append'", "'prepend'"])
if has_append_prepend:
continue # append/prepend mode doesn't need id matching
# Check if element has explicit id attribute
# Pattern 1: Div(id="...", ...) or similar with literal id
has_explicit_id = bool(re.search(r'elements\s*\(\s*\w+\s*\([^)]*id\s*=\s*["\'][^"\']+["\']', line))
# Pattern 2: id=f"..." with f-string (dynamic but valid)
has_explicit_id = has_explicit_id or bool(re.search(r'elements\s*\(\s*\w+\s*\([^)]*id\s*=\s*f["\'][^"\']*["\']', line))
# Pattern 3: id=... with string concatenation (e.g., "#todo-" + str(id))
has_explicit_id = has_explicit_id or bool(re.search(r'elements\s*\(\s*\w+\s*\([^)]*id\s*=\s*[^,)]*\+', line))
# Pattern 4: element is a variable (function return or variable reference)
# e.g., elements(todo_element, "...") or elements(render_todo(todo), "...")
# In this case, assume developer knows what they're doing
has_variable_element = bool(re.search(r'elements\s*\(\s*[a-z_][a-z0-9_]*\s*\(', line, re.IGNORECASE))
has_variable_element = has_variable_element or bool(re.search(r'elements\s*\(\s*[a-z_][a-z0-9_]*\s*,', line, re.IGNORECASE))
if not has_explicit_id and not has_variable_element:
issues.append(Issue(
level="WARNING",
line=i,
code="W020",
message="`elements()` replace-mode — ensure returned element preserves `id` for future targeting",
original=line.strip(),
fix="Add id to element: elements(Div(id=\"target\", ...), \"#target\")"
))
def check_post(analyzer: StarHTMLAnalyzer, issues: list[Issue]) -> None:
"""Post-AST checks that require full context."""
# E006: f() used without import
if analyzer._uses_f_helper and not analyzer._has_f_import:
for lineno in analyzer._uses_f_helper:
issues.append(Issue(
level="ERROR",
line=lineno,
code="E006",
message="`f()` helper used without import — NameError at runtime",
original=analyzer._get_line(lineno),
fix="Add import: from starhtml.datastar import f"
))
# E012: @sse function without yield signals (state cleanup bug)
for func_name, lineno in analyzer._sse_functions:
if func_name not in analyzer._sse_has_yield_signals:
issues.append(Issue(
level="ERROR",
line=lineno,
code="E012",
message=f"`@sse` function `{func_name}` missing `yield signals()` reset — client state not cleaned up",
original=f"def {func_name}(): ...",
fix="Add at end: yield signals(is_sending=False, message=\"\")"
))
# W016: Signal used but not defined (runtime error)
for lineno, (sig_name, attr) in analyzer._used_signals.items():
if sig_name not in analyzer._defined_signals:
# Skip Python builtins and common names
if sig_name in {"True", "False", "None", "print", "len", "str", "int", "float", "list", "dict"}:
continue
issues.append(Issue(
level="WARNING",
line=lineno,
code="W016",
message=f"Signal `{sig_name}` used in `{attr}` but never defined — will cause runtime error",
original=analyzer._get_line(lineno),
fix=f'Define signal: ({sig_name} := Signal("{sig_name}", 0))'
))
# E015: Plugin data attribute used without plugin import/registration
for lineno, (attr, plugin_name) in analyzer._used_plugin_attrs.items():
if plugin_name not in analyzer._registered_plugins:
issues.append(Issue(
level="ERROR",
line=lineno,
code="E015",
message=f"`{attr}` requires plugin `{plugin_name}` — import and register it",
original=analyzer._get_line(lineno),
fix=f'Add: from starhtml.plugins import {plugin_name}\n'
f'Then: app.register({plugin_name})'
))
# E016: data_on_submit with post() but without {"prevent": True}
for lineno, line in enumerate(analyzer.lines, 1):
if "data_on_submit" in line and "post(" in line:
if '{"prevent": True}' not in line and "{'prevent': True}" not in line:
issues.append(Issue(
level="ERROR",
line=lineno,
code="E016",
message="`data_on_submit` with `post()` without `{{\"prevent\": True}}` — form reloads page",
original=line.strip(),
fix='Add prevent modifier: data_on_submit=(post("/api/save"), {"prevent": True})'
))
# W021: switch() used for CSS classes (should use collect())
for lineno in analyzer._switch_usage:
line = analyzer._get_line(lineno)
# Check if switch is used in data_attr_class or data_class_* context
if "data_attr_class" in line or "data_class_" in line:
issues.append(Issue(
level="WARNING",
line=lineno,
code="W021",
message="`switch()` used for CSS classes — use `collect()` to combine multiple classes",
original=line.strip(),
fix="Use collect() for CSS classes: data_attr_class=collect([(cond1, 'class1'), (cond2, 'class2')])"
))
# W022: collect() used for exclusive logic (should use switch() or if_())
for lineno in analyzer._collect_usage:
line = analyzer._get_line(lineno)
# Check if collect is used in non-CSS context (data_text, data_html, etc.)
if "data_text" in line or "data_html" in line or "data_value" in line:
issues.append(Issue(
level="WARNING",
line=lineno,
code="W022",
message="`collect()` used for exclusive logic — use `switch()` or `if_()` for single result",
original=line.strip(),
fix="Use switch() or if_() for exclusive logic: data_text=status.if_('Active', 'Inactive')"
))
# W026: f() helper with < 3 signals (prefer + operator)
for lineno, signal_count in analyzer._f_helper_usage:
if signal_count < 3:
line = analyzer._get_line(lineno)
issues.append(Issue(
level="WARNING",
line=lineno,
code="W026",
message=f"`f()` helper with {signal_count} signal(s) — prefer `+` operator for 1-2 signals",
original=line.strip(),
fix='Use + operator: "Label: " + signal (saves tokens, simpler code)'
))
# W023: .then() without conditional signal
for lineno in analyzer._then_calls:
line = analyzer._get_line(lineno)
# Check if .then() is called on a signal (has signal name before .then)
# Simple heuristic: check if there's a signal-like pattern before .then
has_conditional = bool(re.search(r'[a-z_][a-z0-9_]*\.then\(', line))
if not has_conditional:
issues.append(Issue(
level="WARNING",
line=lineno,
code="W023",
message="`.then()` without conditional signal — verify a boolean signal is used",
original=line.strip(),
fix="Use boolean signal: is_valid.then(post('/api/save'))"
))
# W003: 3+ signals with & operator (prefer all())
# Count & operators on same line
for lineno in analyzer._and_chains:
line = analyzer._get_line(lineno)
and_count = line.count(" & ")
if and_count >= 2: # 2 & means 3+ signals
issues.append(Issue(
level="WARNING",
line=lineno,
code="W003",
message=f"3+ signals with `&` operator — prefer `all(a, b, c)` for readability",
original=line.strip(),
fix="Use all(): all(sig1, sig2, sig3) instead of sig1 & sig2 & sig3"
))
# W024: data_effect without .set() assignment
for lineno in analyzer._data_effect_usage:
# Check the line containing data_effect and the next few lines
# (data_effect might span multiple lines)
context_lines = []
for i in range(lineno, min(lineno + 5, len(analyzer.lines) + 1)):
context_lines.append(analyzer._get_line(i))
context = "\n".join(context_lines)
# Check if data_effect value has .set() call or .then() (valid patterns)
# .set() is for assignment: data_effect=total.set(price * quantity)
# .then() is for conditional execution: data_effect=signal.then(get(...))
# Both are valid patterns - only warn if neither is present
has_valid_pattern = ".set(" in context or ".then(" in context
if not has_valid_pattern:
# Find the exact line with data_effect
data_effect_line = lineno
for i in range(lineno, min(lineno + 5, len(analyzer.lines) + 1)):
if "data_effect" in analyzer._get_line(i):
data_effect_line = i
break
issues.append(Issue(
level="WARNING",
line=data_effect_line,
code="W024",
message="`data_effect` without `.set()` — use `signal.set(expression)` for side effects",
original=analyzer._get_line(data_effect_line).strip(),
fix="Use .set(): data_effect=total.set(price * quantity)"
))
# W025: Component function without **kwargs
for func_name, func_lineno, has_kwargs in analyzer._component_functions:
if not has_kwargs:
issues.append(Issue(
level="WARNING",
line=func_lineno,
code="W025",
message=f"Component `{func_name}` without `**kwargs` — limits pass-through attributes",
original=f"def {func_name}(...):",
fix=f"def {func_name}(..., **kwargs): # then pass **kwargs to root element"
))
# W027: File > 400 lines (suggest split)
if len(analyzer.lines) > 400:
issues.append(Issue(
level="WARNING",
line=1,
code="W027",
message=f"File has {len(analyzer.lines)} lines — consider splitting into smaller modules (max 400 lines)",
original=f"File: {analyzer.lines[0] if analyzer.lines else ''}",
fix="Split into multiple files: components.py, routes.py, handlers.py, etc."
))
# W028: Deep nesting (>3 levels) in components
for lineno, depth in analyzer._deep_nesting_locations:
issues.append(Issue(
level="WARNING",
line=lineno,
code="W028",
message=f"Deep nesting ({depth} levels) — extract to sub-component for better LoB",
original=analyzer._get_line(lineno),
fix="Extract nested elements to a separate component function"
))
# W029: Signal used only frontend without _ prefix
for sig_name, lineno in analyzer._all_signal_definitions.items():
if sig_name not in analyzer._backend_signals and not sig_name.startswith("_"):
# Skip common names and signals that might be used indirectly
if sig_name in {"index", "id", "type", "name", "value", "cls", "todo", "item", "data", "content", "text", "title", "message"}:
continue
# Skip computed signals (they're usually frontend-only by design)
if "getter=" in analyzer._get_line(lineno):
continue
issues.append(Issue(
level="WARNING",
line=lineno,
code="W029",
message=f"Signal `{sig_name}` not used in backend — consider `_` prefix for frontend-only signals",
original=analyzer._get_line(lineno),
fix=f"Rename to _{sig_name} to indicate frontend-only usage"
))
# W030: js() that could be StarHTML (LoB violation)
for lineno, js_code in analyzer._js_calls:
# Check if js() is used for something that StarHTML could handle
# Patterns that StarHTML handles well: show/hide, class toggle, simple value updates
lob_violations = [
("showModal()", "data_show with element"),
("close()", "data_show to hide "),
(".classList.add", "data_class_* or data_attr_class"),
(".classList.remove", "data_class_* or data_attr_class"),
(".style.display", "data_show or data_style_display"),
(".style.opacity", "data_style_opacity"),
(".value =", "data_bind for two-way binding"),
(".textContent", "data_text"),
(".innerHTML", "data_html"),
("alert(", "custom modal with data_show"),
("confirm(", "custom confirmation modal"),
]
for pattern, suggestion in lob_violations:
if pattern in js_code:
issues.append(Issue(
level="WARNING",
line=lineno,
code="W030",
message=f"js() using `{pattern}` — StarHTML can handle this with {suggestion} (LoB)",
original=js_code.strip(),
fix=f"Use StarHTML attribute: {suggestion}"
))
break
def format_report(issues: list[Issue], analyzer: StarHTMLAnalyzer, filename: str, summary_only: bool = False) -> str:
"""Format the analysis report."""
errors = [i for i in issues if i.level == "ERROR"]
warnings = [i for i in issues if i.level == "WARNING"]
lines = [f"── starhtml-check: {filename} ──"]
if not summary_only:
if errors:
lines.append(f"\nERRORS ({len(errors)}):")
for issue in errors:
lines.append(str(issue))
if warnings:
lines.append(f"\nWARNINGS ({len(warnings)}):")
for issue in warnings:
lines.append(str(issue))
# Summary
lines.append("\nSUMMARY:")
signals_str = ", ".join(analyzer.signals[:10])
if len(analyzer.signals) > 10:
signals_str += f" ... (+{len(analyzer.signals) - 10})"
lines.append(f" SIGNALS : {signals_str if analyzer.signals else '(none)'}")
events_str = ", ".join(analyzer.events[:5])
if len(analyzer.events) > 5:
events_str += f" ... (+{len(analyzer.events) - 5})"
lines.append(f" EVENTS : {events_str if analyzer.events else '(none)'}")
reactive_str = ", ".join(list(set(analyzer.reactive_attrs))[:10])
lines.append(f" REACTIVE : {reactive_str if analyzer.reactive_attrs else '(none)'}")
error_word = "error" if len(errors) == 1 else "errors"
warning_word = "warning" if len(warnings) == 1 else "warnings"
lines.append(f" ISSUES : {len(errors)} {error_word}, {len(warnings)} {warning_word}")
if not errors and not warnings:
lines.append("\n ✓ No issues found")
elif summary_only:
lines.append(f"\n ✗ Fix {len(errors)} {error_word} before proceeding")
return "\n".join(lines)
def analyze(source: str, filename: str = "", summary_only: bool = False) -> str:
"""Run full analysis on source code."""
try:
tree = ast.parse(source)
except SyntaxError as e:
return f"── starhtml-check: {filename} ──\n\nSYNTAX ERROR at line {e.lineno}:\n {e.text.strip() if e.text else ''}\n {' ' * (e.offset or 0)}^\n {e.msg}"
analyzer = StarHTMLAnalyzer(source)
analyzer.visit(tree)
issues = analyzer.issues
lines = source.splitlines()
check_regex(source, issues, lines)
check_post(analyzer, issues)
# Deduplicate by (line, code, message[:40])
seen = set()
unique_issues = []
for issue in issues:
key = (issue.line, issue.code, issue.message[:40])
if key not in seen:
seen.add(key)
unique_issues.append(issue)
# Sort by line number
unique_issues.sort(key=lambda i: (i.line != 0, i.line))
return format_report(unique_issues, analyzer, filename, summary_only)
def main():
parser = argparse.ArgumentParser(
description="starhtml-check — Static analyzer for StarHTML code"
)
parser.add_argument("file", nargs="?", help="File to analyze")
parser.add_argument("--code", help="Analyze inline code snippet")
parser.add_argument("--summary", metavar="FILE", help="Compact output (fewer tokens)")
parser.add_argument("--update", action="store_true", help="Check for updates and update to latest version from GitHub")
args = parser.parse_args()
if args.update:
update_checker()
sys.exit(0)
if args.summary:
with open(args.summary, "r") as f:
source = f.read()
report = analyze(source, args.summary, summary_only=True)
print(report)
sys.exit(0)
if args.code:
report = analyze(args.code, "")
print(report)
sys.exit(0)
if args.file:
with open(args.file, "r") as f:
source = f.read()
report = analyze(source, args.file)
print(report)
sys.exit(0)
parser.print_help()
sys.exit(1)
if __name__ == "__main__":
main()