""" SEO Quality Rater Rates content quality against SEO best practices and guidelines. Provides scoring (0-100) and specific recommendations for improvement. """ import re from typing import Dict, List, Optional, Any, Tuple class SEOQualityRater: """Rates content against SEO best practices""" def __init__(self, guidelines: Optional[Dict[str, Any]] = None): """ Initialize SEO Quality Rater Args: guidelines: Custom SEO guidelines (defaults to standard best practices) """ self.guidelines = guidelines or self._default_guidelines() def _default_guidelines(self) -> Dict[str, Any]: """Default SEO guidelines based on industry standards""" return { 'min_word_count': 2000, 'optimal_word_count': 2500, 'max_word_count': 3000, 'primary_keyword_density_min': 1.0, 'primary_keyword_density_max': 2.0, 'secondary_keyword_density': 0.5, 'min_internal_links': 3, 'optimal_internal_links': 5, 'min_external_links': 2, 'optimal_external_links': 3, 'meta_title_length_min': 50, 'meta_title_length_max': 60, 'meta_description_length_min': 150, 'meta_description_length_max': 160, 'min_h2_sections': 4, 'optimal_h2_sections': 6, 'h2_with_keyword_ratio': 0.33, # At least 1/3 of H2s should have keyword 'max_sentence_length': 25, 'target_reading_level_min': 8, 'target_reading_level_max': 10, 'paragraph_sentence_min': 2, 'paragraph_sentence_max': 4, } def rate( self, content: str, meta_title: Optional[str] = None, meta_description: Optional[str] = None, primary_keyword: Optional[str] = None, secondary_keywords: Optional[List[str]] = None, keyword_density: Optional[float] = None, internal_link_count: Optional[int] = None, external_link_count: Optional[int] = None ) -> Dict[str, Any]: """ Rate content against SEO best practices Args: content: Article content meta_title: Meta title tag meta_description: Meta description tag primary_keyword: Target primary keyword secondary_keywords: Target secondary keywords keyword_density: Pre-calculated keyword density internal_link_count: Number of internal links external_link_count: Number of external links Returns: Dict with overall score, category scores, and recommendations """ # Extract structure structure = self._analyze_structure(content, primary_keyword) # Score each category content_score = self._score_content(content, structure) keyword_score = self._score_keyword_optimization( content, structure, primary_keyword, secondary_keywords, keyword_density ) meta_score = self._score_meta_elements( meta_title, meta_description, primary_keyword ) structure_score = self._score_structure(structure) link_score = self._score_links( content, internal_link_count, external_link_count ) readability_score = self._score_readability(content, structure) # Calculate overall score (weighted average) weights = { 'content': 0.20, 'keywords': 0.25, 'meta': 0.15, 'structure': 0.15, 'links': 0.15, 'readability': 0.10 } overall_score = ( content_score['score'] * weights['content'] + keyword_score['score'] * weights['keywords'] + meta_score['score'] * weights['meta'] + structure_score['score'] * weights['structure'] + link_score['score'] * weights['links'] + readability_score['score'] * weights['readability'] ) # Compile all issues critical_issues = [] warnings = [] suggestions = [] for category in [content_score, keyword_score, meta_score, structure_score, link_score, readability_score]: critical_issues.extend(category.get('critical', [])) warnings.extend(category.get('warnings', [])) suggestions.extend(category.get('suggestions', [])) return { 'overall_score': round(overall_score, 1), 'grade': self._get_grade(overall_score), 'category_scores': { 'content': content_score['score'], 'keyword_optimization': keyword_score['score'], 'meta_elements': meta_score['score'], 'structure': structure_score['score'], 'links': link_score['score'], 'readability': readability_score['score'] }, 'critical_issues': critical_issues, 'warnings': warnings, 'suggestions': suggestions, 'publishing_ready': overall_score >= 80 and len(critical_issues) == 0, 'details': { 'word_count': structure['word_count'], 'h2_count': structure['h2_count'], 'has_h1': structure['has_h1'], 'keyword_in_h1': structure.get('keyword_in_h1', False), 'keyword_in_first_100': structure.get('keyword_in_first_100', False) } } def _analyze_structure(self, content: str, primary_keyword: Optional[str]) -> Dict[str, Any]: """Analyze content structure""" lines = content.split('\n') # Extract headings h1_count = 0 h2_count = 0 h3_count = 0 h1_text = "" h2_texts = [] h3_texts = [] for line in lines: h1_match = re.match(r'^#\s+(.+)$', line) h2_match = re.match(r'^##\s+(.+)$', line) h3_match = re.match(r'^###\s+(.+)$', line) if h1_match: h1_count += 1 if not h1_text: # First H1 h1_text = h1_match.group(1) elif h2_match: h2_count += 1 h2_texts.append(h2_match.group(1)) elif h3_match: h3_count += 1 h3_texts.append(h3_match.group(1)) # Word count word_count = len(content.split()) # Paragraph analysis paragraphs = [p for p in content.split('\n\n') if p.strip() and not p.strip().startswith('#')] avg_paragraph_length = sum(len(p.split()) for p in paragraphs) / len(paragraphs) if paragraphs else 0 # Keyword checks keyword_in_h1 = False keyword_in_first_100 = False h2_with_keyword = 0 if primary_keyword: keyword_lower = primary_keyword.lower() keyword_in_h1 = keyword_lower in h1_text.lower() first_100_words = ' '.join(content.split()[:100]).lower() keyword_in_first_100 = keyword_lower in first_100_words for h2 in h2_texts: if keyword_lower in h2.lower(): h2_with_keyword += 1 return { 'word_count': word_count, 'has_h1': h1_count > 0, 'h1_count': h1_count, 'h1_text': h1_text, 'h2_count': h2_count, 'h2_texts': h2_texts, 'h3_count': h3_count, 'paragraph_count': len(paragraphs), 'avg_paragraph_length': avg_paragraph_length, 'keyword_in_h1': keyword_in_h1, 'keyword_in_first_100': keyword_in_first_100, 'h2_with_keyword': h2_with_keyword } def _score_content(self, content: str, structure: Dict) -> Dict[str, Any]: """Score content length and quality""" score = 100 critical = [] warnings = [] suggestions = [] word_count = structure['word_count'] min_words = self.guidelines['min_word_count'] optimal_words = self.guidelines['optimal_word_count'] max_words = self.guidelines['max_word_count'] # Word count scoring if word_count < min_words: score -= 30 critical.append(f"Content is too short ({word_count} words). Minimum is {min_words} words.") elif word_count < optimal_words: score -= 10 warnings.append(f"Content could be longer ({word_count} words). Optimal is {optimal_words}+ words.") elif word_count > max_words: score -= 5 suggestions.append(f"Content is quite long ({word_count} words). Consider breaking into multiple articles if over {max_words} words.") # Paragraph length avg_para = structure['avg_paragraph_length'] if avg_para > 150: score -= 10 warnings.append(f"Paragraphs are too long (avg {avg_para:.0f} words). Break into 2-4 sentence paragraphs.") elif avg_para < 30: score -= 5 suggestions.append(f"Paragraphs are very short (avg {avg_para:.0f} words). Add more detail where appropriate.") return { 'score': max(0, score), 'critical': critical, 'warnings': warnings, 'suggestions': suggestions } def _score_keyword_optimization( self, content: str, structure: Dict, primary_keyword: Optional[str], secondary_keywords: Optional[List[str]], keyword_density: Optional[float] ) -> Dict[str, Any]: """Score keyword optimization""" score = 100 critical = [] warnings = [] suggestions = [] if not primary_keyword: return { 'score': 50, 'critical': ['No primary keyword specified'], 'warnings': [], 'suggestions': [] } # Keyword in H1 if not structure['keyword_in_h1']: score -= 20 critical.append(f"Primary keyword '{primary_keyword}' missing from H1 heading") # Keyword in first 100 words if not structure['keyword_in_first_100']: score -= 15 critical.append(f"Primary keyword '{primary_keyword}' missing from first 100 words") # Keyword in H2 headings h2_count = structure['h2_count'] h2_with_kw = structure['h2_with_keyword'] if h2_count > 0: ratio = h2_with_kw / h2_count target_ratio = self.guidelines['h2_with_keyword_ratio'] if ratio < target_ratio: score -= 10 warnings.append( f"Keyword appears in only {h2_with_kw}/{h2_count} H2 headings. " f"Target is at least {int(target_ratio * 100)}% (2-3 H2s)" ) # Keyword density if keyword_density is not None: min_density = self.guidelines['primary_keyword_density_min'] max_density = self.guidelines['primary_keyword_density_max'] if keyword_density < min_density: score -= 15 warnings.append( f"Keyword density is too low ({keyword_density}%). " f"Target is {min_density}-{max_density}%" ) elif keyword_density > max_density * 1.5: score -= 20 critical.append( f"Keyword density is too high ({keyword_density}%). " f"Risk of keyword stuffing. Target is {min_density}-{max_density}%" ) elif keyword_density > max_density: score -= 10 warnings.append( f"Keyword density is slightly high ({keyword_density}%). " f"Target is {min_density}-{max_density}%" ) # Secondary keywords if secondary_keywords: content_lower = content.lower() missing_keywords = [kw for kw in secondary_keywords if kw.lower() not in content_lower] if missing_keywords: score -= 5 suggestions.append(f"Secondary keywords not found: {', '.join(missing_keywords)}") return { 'score': max(0, score), 'critical': critical, 'warnings': warnings, 'suggestions': suggestions } def _score_meta_elements( self, meta_title: Optional[str], meta_description: Optional[str], primary_keyword: Optional[str] ) -> Dict[str, Any]: """Score meta title and description""" score = 100 critical = [] warnings = [] suggestions = [] # Meta title if not meta_title: score -= 40 critical.append("Meta title is missing") else: title_len = len(meta_title) min_len = self.guidelines['meta_title_length_min'] max_len = self.guidelines['meta_title_length_max'] if title_len < min_len: score -= 15 warnings.append(f"Meta title too short ({title_len} chars). Target is {min_len}-{max_len} chars.") elif title_len > max_len + 10: score -= 10 warnings.append(f"Meta title too long ({title_len} chars). Target is {min_len}-{max_len} chars.") if primary_keyword and primary_keyword.lower() not in meta_title.lower(): score -= 15 warnings.append(f"Primary keyword '{primary_keyword}' not in meta title") # Meta description if not meta_description: score -= 40 critical.append("Meta description is missing") else: desc_len = len(meta_description) min_len = self.guidelines['meta_description_length_min'] max_len = self.guidelines['meta_description_length_max'] if desc_len < min_len: score -= 15 warnings.append(f"Meta description too short ({desc_len} chars). Target is {min_len}-{max_len} chars.") elif desc_len > max_len + 10: score -= 10 warnings.append(f"Meta description too long ({desc_len} chars). Target is {min_len}-{max_len} chars.") if primary_keyword and primary_keyword.lower() not in meta_description.lower(): score -= 10 suggestions.append(f"Primary keyword '{primary_keyword}' not in meta description") return { 'score': max(0, score), 'critical': critical, 'warnings': warnings, 'suggestions': suggestions } def _score_structure(self, structure: Dict) -> Dict[str, Any]: """Score content structure""" score = 100 critical = [] warnings = [] suggestions = [] # H1 check if not structure['has_h1']: score -= 30 critical.append("Missing H1 heading") elif structure['h1_count'] > 1: score -= 20 critical.append(f"Multiple H1 headings found ({structure['h1_count']}). Should only have one.") # H2 count h2_count = structure['h2_count'] min_h2 = self.guidelines['min_h2_sections'] optimal_h2 = self.guidelines['optimal_h2_sections'] if h2_count < min_h2: score -= 15 warnings.append(f"Too few H2 sections ({h2_count}). Add more main sections (target: {optimal_h2}).") elif h2_count < optimal_h2: score -= 5 suggestions.append(f"Could use more H2 sections ({h2_count}). Optimal is {optimal_h2} sections.") return { 'score': max(0, score), 'critical': critical, 'warnings': warnings, 'suggestions': suggestions } def _score_links( self, content: str, internal_count: Optional[int], external_count: Optional[int] ) -> Dict[str, Any]: """Score internal and external linking""" score = 100 critical = [] warnings = [] suggestions = [] # Count links if not provided if internal_count is None: internal_count = len(re.findall(r'\[([^\]]+)\]\((?!http)', content)) if external_count is None: external_count = len(re.findall(r'\[([^\]]+)\]\(https?://', content)) # Internal links min_internal = self.guidelines['min_internal_links'] optimal_internal = self.guidelines['optimal_internal_links'] if internal_count < min_internal: score -= 20 warnings.append( f"Too few internal links ({internal_count}). " f"Add {min_internal - internal_count} more (target: {optimal_internal})." ) elif internal_count < optimal_internal: score -= 5 suggestions.append(f"Could add more internal links ({internal_count}). Optimal is {optimal_internal}.") # External links min_external = self.guidelines['min_external_links'] optimal_external = self.guidelines['optimal_external_links'] if external_count < min_external: score -= 15 warnings.append( f"Too few external links ({external_count}). " f"Add authoritative sources (target: {optimal_external})." ) elif external_count < optimal_external: score -= 5 suggestions.append(f"Could add more external links ({external_count}). Optimal is {optimal_external}.") return { 'score': max(0, score), 'critical': critical, 'warnings': warnings, 'suggestions': suggestions } def _score_readability(self, content: str, structure: Dict) -> Dict[str, Any]: """Score readability factors""" score = 100 critical = [] warnings = [] suggestions = [] # Sentence length analysis sentences = re.split(r'[.!?]+', content) sentences = [s.strip() for s in sentences if s.strip()] sentence_lengths = [len(s.split()) for s in sentences] avg_sentence_length = sum(sentence_lengths) / len(sentence_lengths) if sentence_lengths else 0 max_sentence = self.guidelines['max_sentence_length'] if avg_sentence_length > max_sentence: score -= 10 warnings.append( f"Average sentence length is {avg_sentence_length:.1f} words. " f"Target is under {max_sentence} words for better readability." ) # Very long sentences long_sentences = [s for s in sentence_lengths if s > max_sentence * 1.5] if len(long_sentences) > len(sentences) * 0.2: # More than 20% are too long score -= 10 warnings.append( f"{len(long_sentences)} sentences are very long (>{max_sentence * 1.5} words). " "Break them into shorter sentences." ) # Lists and formatting bullet_lists = len(re.findall(r'^\s*[-*+]\s', content, re.MULTILINE)) numbered_lists = len(re.findall(r'^\s*\d+\.\s', content, re.MULTILINE)) if bullet_lists + numbered_lists == 0: score -= 5 suggestions.append("No lists found. Use bullet points or numbered lists to improve scannability.") return { 'score': max(0, score), 'critical': critical, 'warnings': warnings, 'suggestions': suggestions } def _get_grade(self, score: float) -> str: """Convert score to letter grade""" if score >= 90: return "A (Excellent)" elif score >= 80: return "B (Good)" elif score >= 70: return "C (Average)" elif score >= 60: return "D (Needs Work)" else: return "F (Poor)" # Convenience function def rate_seo_quality( content: str, meta_title: Optional[str] = None, meta_description: Optional[str] = None, primary_keyword: Optional[str] = None, secondary_keywords: Optional[List[str]] = None, keyword_density: Optional[float] = None, internal_link_count: Optional[int] = None, external_link_count: Optional[int] = None, custom_guidelines: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Rate SEO quality of content Args: content: Article content meta_title: Meta title meta_description: Meta description primary_keyword: Target keyword secondary_keywords: Secondary keywords keyword_density: Pre-calculated density internal_link_count: Number of internal links external_link_count: Number of external links custom_guidelines: Custom SEO guidelines Returns: SEO quality rating with score and recommendations """ rater = SEOQualityRater(custom_guidelines) return rater.rate( content, meta_title, meta_description, primary_keyword, secondary_keywords, keyword_density, internal_link_count, external_link_count ) # Example usage if __name__ == "__main__": sample_content = """ # How to Start a Podcast Starting a podcast is easier than you think. This complete guide shows you how to start a podcast from scratch. ## Choose Your Topic Pick a topic you're passionate about. Your podcast topic should resonate with your target audience. ## Get Equipment You'll need a microphone, headphones, and recording software. ## Record Your First Episode Start recording! Don't worry about perfection on your first try. ## Publish Your Podcast Upload to a podcast hosting platform and distribute to directories. Ready to start your podcast? Begin today with these simple steps. """ result = rate_seo_quality( content=sample_content, meta_title="How to Start a Podcast: Complete Guide for 2024", meta_description="Learn how to start a podcast from scratch with this step-by-step guide. Everything you need to know about podcast equipment, recording, and publishing.", primary_keyword="start a podcast", secondary_keywords=["podcast hosting", "recording software"], keyword_density=1.8, internal_link_count=4, external_link_count=2 ) print("=== SEO Quality Report ===") print(f"\nOverall Score: {result['overall_score']}/100") print(f"Grade: {result['grade']}") print(f"Publishing Ready: {result['publishing_ready']}") print(f"\nCategory Scores:") for category, score in result['category_scores'].items(): print(f" {category}: {score}/100") if result['critical_issues']: print(f"\nCritical Issues:") for issue in result['critical_issues']: print(f" ❌ {issue}") if result['warnings']: print(f"\nWarnings:") for warning in result['warnings']: print(f" ⚠️ {warning}") if result['suggestions']: print(f"\nSuggestions:") for suggestion in result['suggestions'][:3]: print(f" 💡 {suggestion}")