### implemented based on https://github.com/getomni-ai/benchmark/blob/main/src/evaluation/json.ts from deepdiff import DeepDiff from typing import Dict, Any, TypedDict, Optional class DiffStats(TypedDict): additions: int deletions: int modifications: int total: int class AccuracyResult(TypedDict): score: float full_json_diff: Dict[str, Any] json_diff: Dict[str, Any] json_diff_stats: Optional[DiffStats] total_fields: int def count_total_fields(obj: Any) -> int: """ Count the total number of primitive fields in a JSON object. """ count = 0 def traverse(current: Any) -> None: nonlocal count if current is None or not isinstance(current, (dict, list)): return if isinstance(current, list): # Traverse into array elements if they're objects for item in current: if isinstance(item, (dict, list)): traverse(item) else: count += 1 else: for key in current: # Skip diff metadata keys if '__' in key: continue # Only count primitive value fields if (current[key] is None or isinstance(current[key], (str, int, float, bool))): count += 1 # Recurse into nested objects and arrays elif isinstance(current[key], (dict, list)): traverse(current[key]) traverse(obj) return count def count_changes_deepdiff(diff_result: Dict[str, Any]) -> DiffStats: """ Count the number of additions, deletions, and modifications from a DeepDiff result. """ changes = { 'additions': 0, 'deletions': 0, 'modifications': 0, 'total': 0 } # Count additions (dictionary_item_added and iterable_item_added) if 'dictionary_item_added' in diff_result: # Each addition might be a primitive or a complex structure for _, value in diff_result['dictionary_item_added'].items(): if value is None or not isinstance(value, (dict, list)): changes['additions'] += 1 else: changes['additions'] += count_total_fields(value) if 'iterable_item_added' in diff_result: for _, value in diff_result['iterable_item_added'].items(): if value is None or not isinstance(value, (dict, list)): changes['additions'] += 1 else: changes['additions'] += count_total_fields(value) # Count deletions (dictionary_item_removed and iterable_item_removed) if 'dictionary_item_removed' in diff_result: for _, value in diff_result['dictionary_item_removed'].items(): if value is None or not isinstance(value, (dict, list)): changes['deletions'] += 1 else: changes['deletions'] += count_total_fields(value) if 'iterable_item_removed' in diff_result: for _, value in diff_result['iterable_item_removed'].items(): if value is None or not isinstance(value, (dict, list)): changes['deletions'] += 1 else: changes['deletions'] += count_total_fields(value) # Count modifications (values_changed and type_changes) if 'values_changed' in diff_result: changes['modifications'] += len(diff_result['values_changed']) if 'type_changes' in diff_result: changes['modifications'] += len(diff_result['type_changes']) changes['total'] = changes['additions'] + changes['deletions'] + changes['modifications'] return changes def calculate_json_accuracy(actual: Dict[str, Any], predicted: Dict[str, Any]) -> AccuracyResult: """ Calculates accuracy for JSON structure and primitive values. The accuracy is calculated as: 1 - (number of differences / total fields in actual) Differences include: - Additions: Fields present in predicted but not in actual - Deletions: Fields present in actual but not in predicted - Modifications: Fields present in both but with different values A score of 1.0 means the JSONs are identical A score of 0.0 means completely different """ # Count the total fields total_fields = count_total_fields(actual) # Get the DeepDiff result diff_result = DeepDiff(actual, predicted, verbose_level=2) # If there's no diff, the JSONs are identical if not diff_result: return { 'score': 1.0, 'json_diff': {}, 'full_json_diff': {}, 'json_diff_stats': { 'additions': 0, 'deletions': 0, 'modifications': 0, 'total': 0 }, 'total_fields': total_fields } # Count the changes changes = count_changes_deepdiff(diff_result) # Calculate the score score = max(0, 1 - (changes['total'] / total_fields if total_fields > 0 else 0)) return { 'score': round(score, 4), 'json_diff': diff_result, 'full_json_diff': diff_result, # Same as json_diff for DeepDiff 'json_diff_stats': changes, 'total_fields': total_fields } # Example usage if __name__ == "__main__": # Install deepdiff if needed: pip install deepdiff actual = { "name": "John", "age": 30, "address": { "street": "123 Main St", "city": "New York" }, "hobbies": ["reading", "swimming"] } predicted = { "name": "John", "age": 31, # Changed "address": { "street": "123 Main St", "city": "Boston" # Changed }, "hobbies": ["reading", "cycling"], # Changed swimming to cycling "occupation": "engineer" # Added } result = calculate_json_accuracy(actual, predicted) print(f"Accuracy score: {result['score']}") print(f"Total fields: {result['total_fields']}") print(f"Diff stats: {result['json_diff_stats']}")