import re
try:
    from .preProcessing import parse_docstring, generate_html
except:
    from preProcessing import parse_docstring, generate_html


def clean_comment(text):
    """Remove C++ comment markers (/*, */, //) and leading '*' from block lines."""
    if not text:
        return ""
    # Remove /* and */
    text = re.sub(r"^/\*+", "", text.strip(), flags=re.MULTILINE)
    text = re.sub(r"\*/$", "", text.strip(), flags=re.MULTILINE)
    # Remove leading // and optional space
    text = re.sub(r"^\s*//\s?", "", text, flags=re.MULTILINE)
    # Remove leading * (common in block comments)
    text = re.sub(r"^\s*\*\s?", "", text, flags=re.MULTILINE)
    return text.strip()

def extract_cpp_objects(file_path):
    """Extract C++ functions and classes (with methods) and their cleaned comments."""
    with open(file_path, "r", encoding="utf-8") as f:
        code = f.read()

    comment_pattern = r"(?:/\*[\s\S]*?\*/|//[^\n]*)"
    class_pattern = r"\bclass\s+(\w+)\s*\{"
    func_pattern = r"(?:[a-zA-Z_]\w*\s+)+([a-zA-Z_]\w*)\s*\([^)]*\)\s*(?:const)?\s*\{"

    objects = []

    # === Find classes ===
    for class_match in re.finditer(class_pattern, code):
        class_name = class_match.group(1)
        body_start = class_match.end() - 1

        # Match braces to find class body
        brace_count = 1
        pos = body_start + 1
        while pos < len(code) and brace_count > 0:
            if code[pos] == "{":
                brace_count += 1
            elif code[pos] == "}":
                brace_count -= 1
            pos += 1
        class_body = code[body_start:pos]

        class_info = {
            'address': file_path,
            'type': 'class',
            'name': class_name,
            'doc': {},
            'methods': []
        }

        # Find methods inside class
        for method_match in re.finditer(func_pattern, class_body):
            method_name = method_match.group(1)
            method_body_start = method_match.end() - 1

            # Find method body
            m_brace_count = 1
            m_pos = method_body_start + 1
            while m_pos < len(class_body) and m_brace_count > 0:
                if class_body[m_pos] == "{":
                    m_brace_count += 1
                elif class_body[m_pos] == "}":
                    m_brace_count -= 1
                m_pos += 1
            method_body = class_body[method_body_start:m_pos]

            # Look for comment at start of body
            comment_match = re.match(r"\{\s*(" + comment_pattern + ")", method_body, re.MULTILINE)
            comment_text = clean_comment(comment_match.group(1)) if comment_match else ""
            method_doc = parse_docstring(comment_text) if comment_text else {}

            class_info['methods'].append({
                'address': file_path,
                'name': method_name,
                'doc': method_doc
            })

        objects.append(class_info)

    # === Find standalone functions (outside classes) ===
    for func_match in re.finditer(func_pattern, code):
        func_name = func_match.group(1)

        # Skip if already captured as class method
        if any(func_name == m['name'] for c in objects if c['type'] == 'class' for m in c['methods']):
            continue

        body_start = func_match.end() - 1
        brace_count = 1
        pos = body_start + 1
        while pos < len(code) and brace_count > 0:
            if code[pos] == "{":
                brace_count += 1
            elif code[pos] == "}":
                brace_count -= 1
            pos += 1
        func_body = code[body_start:pos]

        comment_match = re.match(r"\{\s*(" + comment_pattern + ")", func_body, re.MULTILINE)
        comment_text = clean_comment(comment_match.group(1)) if comment_match else ""
        func_doc = parse_docstring(comment_text) if comment_text else {}

        objects.append({
            'address': file_path,
            'type': 'function',
            'name': func_name,
            'doc': func_doc
        })

    return objects


if __name__ == "__main__":
    items = extract_cpp_objects("src/CPP/TEST/Test classs.cpp")
    for obj in items:
        print(generate_html(obj))