import re try: from .preProcessing import parse_docstring, generate_html except: from preProcessing import parse_docstring, generate_html def clean_comment(text): """Remove C++ comment markers (/*, */, //) and leading '*' from block lines.""" if not text: return "" # Remove /* and */ text = re.sub(r"^/\*+", "", text.strip(), flags=re.MULTILINE) text = re.sub(r"\*/$", "", text.strip(), flags=re.MULTILINE) # Remove leading // and optional space text = re.sub(r"^\s*//\s?", "", text, flags=re.MULTILINE) # Remove leading * (common in block comments) text = re.sub(r"^\s*\*\s?", "", text, flags=re.MULTILINE) return text.strip() def extract_cpp_objects(file_path): """Extract C++ functions and classes (with methods) and their cleaned comments.""" with open(file_path, "r", encoding="utf-8") as f: code = f.read() comment_pattern = r"(?:/\*[\s\S]*?\*/|//[^\n]*)" class_pattern = r"\bclass\s+(\w+)\s*\{" func_pattern = r"(?:[a-zA-Z_]\w*\s+)+([a-zA-Z_]\w*)\s*\([^)]*\)\s*(?:const)?\s*\{" objects = [] # === Find classes === for class_match in re.finditer(class_pattern, code): class_name = class_match.group(1) body_start = class_match.end() - 1 # Match braces to find class body brace_count = 1 pos = body_start + 1 while pos < len(code) and brace_count > 0: if code[pos] == "{": brace_count += 1 elif code[pos] == "}": brace_count -= 1 pos += 1 class_body = code[body_start:pos] class_info = { 'address': file_path, 'type': 'class', 'name': class_name, 'doc': {}, 'methods': [] } # Find methods inside class for method_match in re.finditer(func_pattern, class_body): method_name = method_match.group(1) method_body_start = method_match.end() - 1 # Find method body m_brace_count = 1 m_pos = method_body_start + 1 while m_pos < len(class_body) and m_brace_count > 0: if class_body[m_pos] == "{": m_brace_count += 1 elif class_body[m_pos] == "}": m_brace_count -= 1 m_pos += 1 method_body = class_body[method_body_start:m_pos] # Look for comment at start of body comment_match = re.match(r"\{\s*(" + comment_pattern + ")", method_body, re.MULTILINE) comment_text = clean_comment(comment_match.group(1)) if comment_match else "" method_doc = parse_docstring(comment_text) if comment_text else {} class_info['methods'].append({ 'address': file_path, 'name': method_name, 'doc': method_doc }) objects.append(class_info) # === Find standalone functions (outside classes) === for func_match in re.finditer(func_pattern, code): func_name = func_match.group(1) # Skip if already captured as class method if any(func_name == m['name'] for c in objects if c['type'] == 'class' for m in c['methods']): continue body_start = func_match.end() - 1 brace_count = 1 pos = body_start + 1 while pos < len(code) and brace_count > 0: if code[pos] == "{": brace_count += 1 elif code[pos] == "}": brace_count -= 1 pos += 1 func_body = code[body_start:pos] comment_match = re.match(r"\{\s*(" + comment_pattern + ")", func_body, re.MULTILINE) comment_text = clean_comment(comment_match.group(1)) if comment_match else "" func_doc = parse_docstring(comment_text) if comment_text else {} objects.append({ 'address': file_path, 'type': 'function', 'name': func_name, 'doc': func_doc }) return objects if __name__ == "__main__": items = extract_cpp_objects("src/CPP/TEST/Test classs.cpp") for obj in items: print(generate_html(obj))