Image Validation Testing

Check for missing images, alt text validation, and broken image URLs

LoadForge can record your browser, graphically build tests, scan your site with a wizard and more. Sign up now to run your first test.

Sign up now


This guide demonstrates how to validate images across your website with LoadForge, checking for missing images, accessibility issues, and SEO problems.

Use Cases

  • Detecting broken or missing images (404 errors)
  • Validating alt text for accessibility compliance
  • Checking image loading performance and optimization
  • Finding oversized or unoptimized images
  • Validating responsive image implementations

Simple Image Validation

from locust import HttpUser, task, between
import re
import time
from urllib.parse import urlparse
from collections import deque

class SimpleImageValidator(HttpUser):
    wait_time = between(0.5, 1)
    
    # CONFIGURATION - Easily modify these values
    MAX_IMAGE_SIZE_MB = 1.0  # Images larger than this will be flagged
    MAX_ALT_TEXT_LENGTH = 125  # Alt text longer than this will be flagged
    
    def on_start(self):
        """Initialize simple image validation"""
        self.visited_pages = set()
        self.pages_to_check = deque(['/'])
        self.broken_images = []
        self.oversized_images = []
        self.missing_alt_text = []
        self.total_images_checked = 0
        self.base_domain = None
        
        # Initialize with homepage
        self._initialize_validator()
        
    def _initialize_validator(self):
        """Initialize the image validator with homepage"""
        try:
            homepage_response = self.client.get('/', name="INIT: Homepage Check")
            
            if homepage_response.status_code == 200:
                self.base_domain = urlparse(self.client.base_url).netloc
                print(f"Starting image validation for: {self.base_domain}")
                print(f"Max image size limit: {self.MAX_IMAGE_SIZE_MB}MB")
            else:
                print(f"Failed to access homepage: {homepage_response.status_code}")
                
        except Exception as e:
            print(f"Error initializing validator: {str(e)}")

    @task(10)
    def crawl_and_validate_images(self):
        """Main task - crawl pages and validate images"""
        if not self.pages_to_check:
            return
            
        current_page = self.pages_to_check.popleft()
        
        if current_page in self.visited_pages:
            return
            
        self.visited_pages.add(current_page)
        
        try:
            response = self.client.get(current_page, name=f"CRAWL: {current_page}")
            
            if response.status_code == 200:
                self._find_and_validate_images(current_page, response.text)
                self._find_more_pages(current_page, response.text)
            else:
                print(f"Cannot access page: {current_page} (Status: {response.status_code})")
                
        except Exception as e:
            print(f"Error crawling {current_page}: {str(e)}")

    def _find_and_validate_images(self, page_url, html_content):
        """Find all images on page and validate them"""
        # Find all img tags
        img_pattern = r'<img[^>]*>'
        img_tags = re.findall(img_pattern, html_content, re.IGNORECASE)
        
        for img_tag in img_tags:
            self._validate_single_image(page_url, img_tag)

    def _validate_single_image(self, page_url, img_tag):
        """Validate a single image tag"""
        self.total_images_checked += 1
        
        # Extract src and alt attributes
        src = self._extract_attribute(img_tag, 'src')
        alt = self._extract_attribute(img_tag, 'alt')
        
        # Skip if no src or data URL
        if not src or src.startswith('data:'):
            return
            
        # Validate alt text
        self._check_alt_text(page_url, src, alt)
        
        # Validate image accessibility and size
        self._check_image_url(page_url, src)

    def _check_alt_text(self, page_url, src, alt):
        """Check alt text for accessibility compliance"""
        if alt is None:
            # Missing alt attribute
            self.missing_alt_text.append({
                'page': page_url,
                'image': src,
                'issue': 'Missing alt attribute'
            })
            print(f"❌ Missing alt text: {src} on {page_url}")
            
        elif alt.strip() == '':
            # Empty alt (decorative image - this is OK)
            pass
            
        elif len(alt) > self.MAX_ALT_TEXT_LENGTH:
            # Alt text too long
            self.missing_alt_text.append({
                'page': page_url,
                'image': src,
                'issue': f'Alt text too long ({len(alt)} chars): {alt[:50]}...'
            })
            print(f"⚠️ Alt text too long: {src} on {page_url}")
            
        elif alt.lower() in ['image', 'picture', 'photo', 'img']:
            # Generic alt text
            self.missing_alt_text.append({
                'page': page_url,
                'image': src,
                'issue': f'Generic alt text: {alt}'
            })
            print(f"⚠️ Generic alt text: {src} on {page_url}")

    def _check_image_url(self, page_url, src):
        """Check if image URL is accessible and not oversized"""
        # Normalize URL
        image_url = self._normalize_image_url(src)
        if not image_url:
            return
            
        try:
            with self.client.head(image_url, 
                                name=f"IMG: {image_url}", 
                                catch_response=True) as response:
                
                if response.status_code == 404:
                    self.broken_images.append({
                        'page': page_url,
                        'image': src,
                        'status': 404
                    })
                    print(f"❌ Broken image (404): {src} on {page_url}")
                    response.failure("Image 404")
                    
                elif response.status_code >= 400:
                    self.broken_images.append({
                        'page': page_url,
                        'image': src,
                        'status': response.status_code
                    })
                    print(f"❌ Image error ({response.status_code}): {src} on {page_url}")
                    response.failure(f"Image HTTP {response.status_code}")
                    
                else:
                    # Check image size
                    self._check_image_size(page_url, src, response.headers)
                    response.success()
                    
        except Exception as e:
            self.broken_images.append({
                'page': page_url,
                'image': src,
                'status': 0,
                'error': str(e)
            })
            print(f"❌ Image request failed: {src} - {str(e)}")

    def _check_image_size(self, page_url, src, headers):
        """Check if image exceeds size limit"""
        content_length = headers.get('Content-Length')
        if content_length:
            size_mb = int(content_length) / (1024 * 1024)
            
            if size_mb > self.MAX_IMAGE_SIZE_MB:
                self.oversized_images.append({
                    'page': page_url,
                    'image': src,
                    'size_mb': round(size_mb, 2)
                })
                print(f"⚠️ Oversized image ({size_mb:.2f}MB): {src} on {page_url}")

    def _normalize_image_url(self, src):
        """Convert image src to testable URL"""
        if src.startswith('/'):
            return src
        elif src.startswith('http'):
            # External image - skip for simple validation
            if urlparse(src).netloc != self.base_domain:
                return None
            return urlparse(src).path
        else:
            # Relative URL
            return '/' + src.lstrip('./')

    def _find_more_pages(self, current_page, html_content):
        """Find internal links to add to crawl queue"""
        if len(self.pages_to_check) > 20:  # Limit queue size
            return
            
        link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
        links = re.findall(link_pattern, html_content, re.IGNORECASE)
        
        for link in links:
            if self._is_internal_link(link) and link not in self.visited_pages:
                normalized_link = self._normalize_page_url(link)
                if normalized_link and normalized_link not in self.pages_to_check:
                    self.pages_to_check.append(normalized_link)

    def _is_internal_link(self, link):
        """Check if link is internal"""
        if link.startswith('#') or link.startswith('mailto:') or link.startswith('tel:'):
            return False
        if link.startswith('/') or not link.startswith('http'):
            return True
        if link.startswith('http'):
            return urlparse(link).netloc == self.base_domain
        return True

    def _normalize_page_url(self, link):
        """Normalize page URL"""
        try:
            if link.startswith('/'):
                return link
            elif link.startswith('http'):
                parsed = urlparse(link)
                if parsed.netloc == self.base_domain:
                    return parsed.path
                return None
            else:
                return '/' + link.lstrip('./')
        except:
            return None

    def _extract_attribute(self, tag, attribute):
        """Extract attribute value from HTML tag"""
        pattern = f'{attribute}=["\']([^"\']*)["\']'
        match = re.search(pattern, tag, re.IGNORECASE)
        return match.group(1) if match else None

    @task(1)
    def report_status(self):
        """Print current validation status"""
        if len(self.visited_pages) < 3:
            return
            
        print(f"STATUS: {len(self.visited_pages)} pages crawled, "
              f"{self.total_images_checked} images checked, "
              f"{len(self.broken_images)} broken, "
              f"{len(self.oversized_images)} oversized, "
              f"{len(self.missing_alt_text)} alt issues")

    def on_stop(self):
        """Final summary when test completes"""
        print("\n" + "="*50)
        print("IMAGE VALIDATION COMPLETE")
        print("="*50)
        print(f"Pages crawled: {len(self.visited_pages)}")
        print(f"Images checked: {self.total_images_checked}")
        print(f"Broken images: {len(self.broken_images)}")
        print(f"Oversized images: {len(self.oversized_images)}")
        print(f"Alt text issues: {len(self.missing_alt_text)}")
        
        if self.broken_images:
            print(f"\nBROKEN IMAGES ({len(self.broken_images)}):")
            print("-" * 40)
            for img in self.broken_images[:5]:  # Show first 5
                print(f"❌ {img['image']} (HTTP {img['status']}) on {img['page']}")
                
        if self.oversized_images:
            print(f"\nOVERSIZED IMAGES ({len(self.oversized_images)}):")
            print("-" * 40)
            for img in self.oversized_images[:5]:  # Show first 5
                print(f"⚠️ {img['image']} ({img['size_mb']}MB) on {img['page']}")
                
        if self.missing_alt_text:
            print(f"\nALT TEXT ISSUES ({len(self.missing_alt_text)}):")
            print("-" * 40)
            for img in self.missing_alt_text[:5]:  # Show first 5
                print(f"⚠️ {img['image']}: {img['issue']} on {img['page']}")

Comprehensive Image Validation

from locust import HttpUser, task, between
import json
import time
import re
from urllib.parse import urljoin, urlparse
from collections import defaultdict, deque
import requests

class ComprehensiveImageValidator(HttpUser):
    wait_time = between(0.5, 2)
    
    # CONFIGURATION - Easily modify these values
    MAX_IMAGE_SIZE_MB = 1.0  # Images larger than this will be flagged
    MAX_ALT_TEXT_LENGTH = 125  # Alt text longer than this will be flagged
    MIN_ALT_TEXT_LENGTH = 3   # Alt text shorter than this will be flagged
    CHECK_EXTERNAL_IMAGES = True  # Whether to validate external images
    EXTERNAL_CHECK_RATE_LIMIT = 5  # Seconds between external image checks per domain
    
    def on_start(self):
        """Initialize comprehensive image validation"""
        self.visited_pages = set()
        self.pages_to_check = deque(['/'])
        self.image_issues = []
        self.image_stats = defaultdict(int)
        self.large_images = []
        self.missing_alt_text = []
        self.broken_images = []
        self.external_domains_checked = {}
        self.base_domain = None
        
        # Initialize crawler
        self._initialize_comprehensive_validator()
        
    def _initialize_comprehensive_validator(self):
        """Initialize the comprehensive image validator"""
        homepage_response = self.client.get('/', name="INIT: Homepage Check")
        
        if homepage_response.status_code == 200:
            self.base_domain = urlparse(self.client.base_url).netloc
            print(f"Starting comprehensive image validation for: {self.base_domain}")
            print(f"Configuration: Max size {self.MAX_IMAGE_SIZE_MB}MB, "
                  f"External images: {self.CHECK_EXTERNAL_IMAGES}")
        else:
            print(f"Failed to access homepage: {homepage_response.status_code}")

    @task(5)
    def crawl_and_validate_all_images(self):
        """Main crawling task to validate all image types"""
        if not self.pages_to_check:
            return
            
        current_page = self.pages_to_check.popleft()
        
        if current_page in self.visited_pages:
            return
            
        self.visited_pages.add(current_page)
        
        # Get the page content
        response = self.client.get(current_page, name=f"CRAWL: {current_page}")
        
        if response.status_code == 200:
            self._extract_and_validate_all_images(current_page, response.text)
            self._find_more_pages_to_crawl(current_page, response.text)
        else:
            print(f"Cannot access page for image validation: {current_page}")

    def _extract_and_validate_all_images(self, page_url, html_content):
        """Extract all types of images from HTML and validate them"""
        # Find all image-related elements
        image_patterns = [
            (r'<img[^>]*>', 'img'),
            (r'<picture[^>]*>.*?</picture>', 'picture'),
            (r'<source[^>]*>', 'source'),
            (r'<video[^>]*poster=["\']([^"\']+)["\'][^>]*>', 'video_poster'),
            (r'<link[^>]*rel=["\']icon["\'][^>]*>', 'favicon'),
        ]
        
        for pattern, element_type in image_patterns:
            elements = re.findall(pattern, html_content, re.IGNORECASE | re.DOTALL)
            
            for element in elements:
                if element_type == 'img':
                    self._validate_img_tag(page_url, element)
                elif element_type == 'picture':
                    self._validate_picture_element(page_url, element)
                elif element_type == 'source':
                    self._validate_source_element(page_url, element)
                elif element_type == 'video_poster':
                    self._validate_video_poster(page_url, element)
                elif element_type == 'favicon':
                    self._validate_favicon(page_url, element)

    def _validate_img_tag(self, page_url, img_tag):
        """Comprehensive validation of img tag"""
        self.image_stats['total_images'] += 1
        
        # Extract all relevant attributes
        src = self._extract_attribute(img_tag, 'src')
        alt = self._extract_attribute(img_tag, 'alt')
        width = self._extract_attribute(img_tag, 'width')
        height = self._extract_attribute(img_tag, 'height')
        loading = self._extract_attribute(img_tag, 'loading')
        srcset = self._extract_attribute(img_tag, 'srcset')
        
        # Skip data URLs and placeholders
        if not src or src.startswith('data:') or 'placeholder' in src.lower():
            if src and src.startswith('data:'):
                self.image_stats['data_urls'] += 1
            return
            
        # Comprehensive alt text validation
        self._comprehensive_alt_validation(page_url, img_tag, alt, src)
        
        # Validate image URL and accessibility
        self._comprehensive_image_url_validation(page_url, src)
        
        # Check for responsive images
        if srcset:
            self._validate_srcset(page_url, srcset)
            self.image_stats['responsive_images'] += 1
            
        # Check lazy loading
        if loading and loading.lower() == 'lazy':
            self.image_stats['lazy_loaded_images'] += 1
        
        # Validate dimensions for layout stability
        self._validate_image_dimensions(page_url, img_tag, width, height)

    def _comprehensive_alt_validation(self, page_url, img_tag, alt, src):
        """Comprehensive alt text validation"""
        if alt is None:
            self._record_image_issue(page_url, 'HIGH', 
                                   f'Missing alt attribute for image: {src}', img_tag)
            self.missing_alt_text.append({
                'page': page_url,
                'src': src,
                'issue': 'missing_alt_attribute'
            })
        elif alt.strip() == '':
            # Empty alt text (decorative image)
            self.image_stats['decorative_images'] += 1
        elif len(alt) < self.MIN_ALT_TEXT_LENGTH:
            self._record_image_issue(page_url, 'MEDIUM', 
                                   f'Very short alt text: "{alt}" for {src}', img_tag)
        elif len(alt) > self.MAX_ALT_TEXT_LENGTH:
            self._record_image_issue(page_url, 'LOW', 
                                   f'Alt text too long ({len(alt)} chars): {src}', img_tag)
        elif alt.lower() in ['image', 'picture', 'photo', 'img', 'logo']:
            self._record_image_issue(page_url, 'MEDIUM', 
                                   f'Generic alt text: "{alt}" for {src}', img_tag)
        else:
            self.image_stats['valid_alt_text'] += 1

    def _comprehensive_image_url_validation(self, page_url, src):
        """Comprehensive image URL validation"""
        # Determine if internal or external
        is_internal = self._is_internal_image(src)
        
        if is_internal:
            self._validate_internal_image(page_url, src)
        elif self.CHECK_EXTERNAL_IMAGES:
            self._validate_external_image(page_url, src)
        else:
            self.image_stats['external_images_skipped'] += 1

    def _validate_internal_image(self, page_url, src):
        """Validate internal image URLs"""
        self.image_stats['internal_images'] += 1
        image_url = self._normalize_internal_image_url(src)
        
        if not image_url:
            self._record_image_issue(page_url, 'HIGH', f'Invalid internal image URL: {src}', '')
            return
            
        with self.client.head(image_url,
                            name="validate_internal_image",
                            catch_response=True) as response:
            
            self._process_image_response(page_url, src, response, is_internal=True)

    def _validate_external_image(self, page_url, src):
        """Validate external image URLs with rate limiting"""
        self.image_stats['external_images'] += 1
        
        domain = urlparse(src).netloc
        current_time = time.time()
        
        # Rate limit external checks
        if domain in self.external_domains_checked:
            last_check = self.external_domains_checked[domain].get('last_check', 0)
            if current_time - last_check < self.EXTERNAL_CHECK_RATE_LIMIT:
                return
                
        self.external_domains_checked[domain] = {'last_check': current_time}
        
        try:
            response = requests.head(src, timeout=10, allow_redirects=True)
            self._process_external_image_response(page_url, src, response)
        except requests.exceptions.RequestException as e:
            self._record_image_issue(page_url, 'MEDIUM', 
                                   f'External image failed: {src} - {str(e)}', '')

    def _process_image_response(self, page_url, src, response, is_internal=True):
        """Process image response and check for issues"""
        if response.status_code == 404:
            severity = 'CRITICAL' if is_internal else 'HIGH'
            self._record_image_issue(page_url, severity, f'Image not found (404): {src}', '')
            self.broken_images.append({
                'page': page_url,
                'src': src,
                'status_code': 404,
                'type': 'internal' if is_internal else 'external'
            })
            response.failure("Image 404")
            
        elif response.status_code >= 400:
            severity = 'HIGH' if is_internal else 'MEDIUM'
            self._record_image_issue(page_url, severity, 
                                   f'Image error ({response.status_code}): {src}', '')
            response.failure(f"Image HTTP {response.status_code}")
            
        else:
            self.image_stats['accessible_images'] += 1
            self._check_image_size_from_headers(page_url, src, response.headers)
            response.success()

    def _process_external_image_response(self, page_url, src, response):
        """Process external image response"""
        if response.status_code == 404:
            self._record_image_issue(page_url, 'HIGH', f'External image not found (404): {src}', '')
        elif response.status_code >= 400:
            self._record_image_issue(page_url, 'MEDIUM', 
                                   f'External image error ({response.status_code}): {src}', '')
        else:
            self.image_stats['valid_external_images'] += 1
            self._check_image_size_from_headers(page_url, src, response.headers)

    def _check_image_size_from_headers(self, page_url, src, headers):
        """Check image size from Content-Length header"""
        content_length = headers.get('Content-Length')
        if content_length:
            size_mb = int(content_length) / (1024 * 1024)
            if size_mb > self.MAX_IMAGE_SIZE_MB:
                self._record_image_issue(page_url, 'MEDIUM', 
                                       f'Large image ({size_mb:.2f}MB): {src}', '')
                self.large_images.append({
                    'page': page_url,
                    'src': src,
                    'size_mb': size_mb
                })

    def _validate_srcset(self, page_url, srcset):
        """Validate srcset for responsive images"""
        srcset_entries = [entry.strip() for entry in srcset.split(',')]
        
        for entry in srcset_entries:
            parts = entry.split()
            if len(parts) >= 2:
                src = parts[0]
                descriptor = parts[1]
                
                # Validate each srcset image
                if self._is_internal_image(src):
                    self._validate_internal_image(page_url, src)
                
                # Check descriptor format
                if not (descriptor.endswith('w') or descriptor.endswith('x')):
                    self._record_image_issue(page_url, 'LOW', 
                                           f'Invalid srcset descriptor: {descriptor}', '')

    def _validate_picture_element(self, page_url, picture_element):
        """Validate picture element for responsive images"""
        self.image_stats['picture_elements'] += 1
        
        # Extract source elements and img tag from picture
        sources = re.findall(r'<source[^>]*>', picture_element, re.IGNORECASE)
        img_tags = re.findall(r'<img[^>]*>', picture_element, re.IGNORECASE)
        
        for source in sources:
            self._validate_source_element(page_url, source)
        
        for img_tag in img_tags:
            self._validate_img_tag(page_url, img_tag)

    def _validate_source_element(self, page_url, source_element):
        """Validate source element"""
        srcset = self._extract_attribute(source_element, 'srcset')
        if srcset:
            self._validate_srcset(page_url, srcset)

    def _validate_video_poster(self, page_url, video_tag):
        """Validate video poster images"""
        poster = self._extract_attribute(video_tag, 'poster')
        if poster:
            self.image_stats['video_posters'] += 1
            if self._is_internal_image(poster):
                self._validate_internal_image(page_url, poster)

    def _validate_favicon(self, page_url, link_tag):
        """Validate favicon and icon links"""
        href = self._extract_attribute(link_tag, 'href')
        if href:
            self.image_stats['favicons'] += 1
            if self._is_internal_image(href):
                self._validate_internal_image(page_url, href)

    def _validate_image_dimensions(self, page_url, img_tag, width, height):
        """Validate image dimensions for layout stability"""
        if not width and not height:
            self._record_image_issue(page_url, 'LOW', 
                                   'Missing width/height attributes (may cause layout shift)', img_tag)
        elif width and not height:
            self._record_image_issue(page_url, 'LOW', 'Missing height attribute', img_tag)
        elif height and not width:
            self._record_image_issue(page_url, 'LOW', 'Missing width attribute', img_tag)
        else:
            self.image_stats['dimensioned_images'] += 1

    def _is_internal_image(self, src):
        """Check if image is internal"""
        if src.startswith('/'):
            return True
        if src.startswith('http'):
            return urlparse(src).netloc == self.base_domain
        return True  # Relative URLs are internal

    def _normalize_internal_image_url(self, src):
        """Normalize internal image URL"""
        try:
            if src.startswith('/'):
                return src
            elif src.startswith('http'):
                parsed = urlparse(src)
                if parsed.netloc == self.base_domain:
                    return parsed.path
                return None
            else:
                return '/' + src.lstrip('./')
        except:
            return None

    def _find_more_pages_to_crawl(self, current_page, html_content):
        """Find more internal pages to crawl"""
        if len(self.pages_to_check) > 30:  # Limit queue size
            return
            
        link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
        links = re.findall(link_pattern, html_content, re.IGNORECASE)
        
        for link in links:
            if self._is_internal_link(link) and link not in self.visited_pages:
                normalized_link = self._normalize_page_url(link)
                if normalized_link and normalized_link not in self.pages_to_check:
                    self.pages_to_check.append(normalized_link)

    def _is_internal_link(self, link):
        """Check if link is internal"""
        if link.startswith('#') or link.startswith('mailto:') or link.startswith('tel:'):
            return False
        if link.startswith('/') or not link.startswith('http'):
            return True
        if link.startswith('http'):
            return urlparse(link).netloc == self.base_domain
        return True

    def _normalize_page_url(self, link):
        """Normalize page URL"""
        try:
            if link.startswith('/'):
                return link
            elif link.startswith('http'):
                parsed = urlparse(link)
                if parsed.netloc == self.base_domain:
                    return parsed.path
                return None
            else:
                return '/' + link.lstrip('./')
        except:
            return None

    def _extract_attribute(self, tag, attribute):
        """Extract attribute value from HTML tag"""
        pattern = f'{attribute}=["\']([^"\']*)["\']'
        match = re.search(pattern, tag, re.IGNORECASE)
        return match.group(1) if match else None

    def _record_image_issue(self, page_url, severity, description, element):
        """Record an image issue with details"""
        issue = {
            'page': page_url,
            'severity': severity,
            'description': description,
            'element': element[:200] if element else '',
            'timestamp': time.time()
        }
        
        self.image_issues.append(issue)
        self.image_stats['total_issues'] += 1
        
        print(f"IMAGE ISSUE [{severity}]: {description} (on {page_url})")

    @task(1)
    def generate_comprehensive_report(self):
        """Generate comprehensive image validation report"""
        if len(self.image_issues) == 0 and len(self.visited_pages) < 3:
            return
            
        report = {
            'timestamp': time.time(),
            'domain': self.base_domain,
            'configuration': {
                'max_image_size_mb': self.MAX_IMAGE_SIZE_MB,
                'check_external_images': self.CHECK_EXTERNAL_IMAGES
            },
            'pages_crawled': len(self.visited_pages),
            'total_image_issues': len(self.image_issues),
            'image_statistics': dict(self.image_stats),
            'issues_by_severity': self._group_issues_by_severity(),
            'accessibility_issues': len(self.missing_alt_text),
            'broken_images': len(self.broken_images),
            'large_images': len(self.large_images)
        }
        
        self.client.post('/api/qa/comprehensive-image-report',
                        json=report,
                        name="submit_comprehensive_image_report")
        
        print(f"COMPREHENSIVE REPORT: {len(self.image_issues)} issues across {len(self.visited_pages)} pages")

    def _group_issues_by_severity(self):
        """Group issues by severity level"""
        severity_groups = defaultdict(list)
        for issue in self.image_issues:
            severity_groups[issue['severity']].append(issue)
        return dict(severity_groups)

    def on_stop(self):
        """Final comprehensive report"""
        print("\n" + "="*60)
        print("COMPREHENSIVE IMAGE VALIDATION COMPLETE")
        print("="*60)
        print(f"Configuration: Max size {self.MAX_IMAGE_SIZE_MB}MB, External: {self.CHECK_EXTERNAL_IMAGES}")
        print(f"Pages crawled: {len(self.visited_pages)}")
        print(f"Total images: {self.image_stats['total_images']}")
        print(f"Issues found: {len(self.image_issues)}")
        print(f"Broken images: {len(self.broken_images)}")
        print(f"Large images: {len(self.large_images)}")
        print(f"Alt text issues: {len(self.missing_alt_text)}")
        
        if self.image_issues:
            print(f"\nTOP ISSUES BY SEVERITY:")
            severity_counts = defaultdict(int)
            for issue in self.image_issues:
                severity_counts[issue['severity']] += 1
            
            for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']:
                if severity_counts[severity] > 0:
                    print(f"  {severity}: {severity_counts[severity]} issues")

Advanced Image Testing

from locust import HttpUser, task, between
import json
import time
import requests

class AdvancedImageTester(HttpUser):
    wait_time = between(1, 3)
    
    # CONFIGURATION
    MODERN_FORMATS_TO_TEST = ['webp', 'avif', 'heic']  # Modern formats to check support for
    CDN_DOMAINS = ['cdn.example.com', 'images.example.com']  # CDN domains to validate
    
    @task(3)
    def test_modern_image_format_support(self):
        """Test support for modern image formats like WebP, AVIF"""
        test_images = [
            '/images/hero.jpg',
            '/images/product.png',
            '/images/banner.jpg'
        ]
        
        for image_path in test_images:
            for format_type in self.MODERN_FORMATS_TO_TEST:
                # Test with Accept header for modern format
                accept_header = f'image/{format_type},image/*,*/*;q=0.8'
                
                with self.client.get(image_path,
                                   headers={'Accept': accept_header},
                                   name=f"test_{format_type}_support",
                                   catch_response=True) as response:
                    
                    if response.status_code == 200:
                        content_type = response.headers.get('Content-Type', '')
                        if format_type in content_type:
                            print(f"✅ {format_type.upper()} supported for: {image_path}")
                            response.success()
                        else:
                            print(f"⚠️ {format_type.upper()} not served for: {image_path}")
                            response.success()  # Not an error, just not optimized
                    else:
                        response.failure(f"Image not accessible for {format_type} test")

    @task(2)
    def test_image_cdn_performance(self):
        """Test image CDN performance and caching"""
        cdn_images = [
            '/images/cached-hero.jpg',
            '/images/optimized-banner.webp',
            '/static/img/logo.svg'
        ]
        
        for image_path in cdn_images:
            # First request - should be slow (cache miss)
            start_time = time.time()
            response1 = self.client.get(image_path, name="image_cdn_first_request")
            first_request_time = time.time() - start_time
            
            if response1.status_code == 200:
                # Check for CDN headers
                self._validate_cdn_headers(image_path, response1.headers)
                
                # Second request - should be faster (cache hit)
                start_time = time.time()
                response2 = self.client.get(image_path, name="image_cdn_cached_request")
                second_request_time = time.time() - start_time
                
                if response2.status_code == 200:
                    # Compare response times
                    if second_request_time < first_request_time * 0.8:
                        print(f"✅ CDN caching effective for: {image_path}")
                    else:
                        print(f"⚠️ CDN caching may not be working for: {image_path}")

    def _validate_cdn_headers(self, image_path, headers):
        """Validate CDN and caching headers"""
        cache_control = headers.get('Cache-Control', '')
        etag = headers.get('ETag', '')
        expires = headers.get('Expires', '')
        cdn_cache = headers.get('X-Cache', '') or headers.get('CF-Cache-Status', '')
        
        issues = []
        
        if not cache_control and not expires:
            issues.append("Missing cache headers")
            
        if not etag:
            issues.append("Missing ETag header")
            
        if cdn_cache and 'miss' in cdn_cache.lower():
            issues.append("CDN cache miss")
            
        if issues:
            print(f"⚠️ CDN issues for {image_path}: {', '.join(issues)}")
        else:
            print(f"✅ CDN headers valid for: {image_path}")

    @task(2)
    def test_responsive_image_optimization(self):
        """Test responsive image implementations"""
        # Test pages likely to have responsive images
        responsive_pages = [
            '/',
            '/blog',
            '/products',
            '/gallery'
        ]
        
        for page in responsive_pages:
            response = self.client.get(page, name="check_responsive_images_page")
            
            if response.status_code == 200:
                self._analyze_responsive_images(page, response.text)

    def _analyze_responsive_images(self, page_url, html_content):
        """Analyze responsive image implementations on page"""
        import re
        
        # Check for picture elements
        picture_count = len(re.findall(r'<picture[^>]*>', html_content, re.IGNORECASE))
        
        # Check for srcset usage
        srcset_count = len(re.findall(r'srcset=["\']', html_content, re.IGNORECASE))
        
        # Check for sizes attribute
        sizes_count = len(re.findall(r'sizes=["\']', html_content, re.IGNORECASE))
        
        print(f"RESPONSIVE ANALYSIS for {page_url}:")
        print(f"  Picture elements: {picture_count}")
        print(f"  Srcset usage: {srcset_count}")
        print(f"  Sizes attributes: {sizes_count}")
        
        # Basic optimization check
        if srcset_count > 0 or picture_count > 0:
            print(f"✅ Responsive images implemented on {page_url}")
        else:
            print(f"⚠️ No responsive images detected on {page_url}")

    @task(1)
    def test_image_accessibility_features(self):
        """Test advanced accessibility features"""
        # Test for common accessibility patterns
        accessibility_tests = [
            ('/images/chart.png', 'Should have descriptive alt text for charts'),
            ('/images/infographic.jpg', 'Should have detailed alt text for infographics'),
            ('/images/decorative-border.png', 'Should have empty alt for decorative images')
        ]
        
        for image_path, accessibility_note in accessibility_tests:
            response = self.client.get('/', name="accessibility_context_check")
            
            if response.status_code == 200:
                # Check if image exists in page context
                if image_path in response.text:
                    print(f"ACCESSIBILITY CHECK: {image_path} - {accessibility_note}")

    @task(1)  
    def test_image_seo_optimization(self):
        """Test SEO aspects of images"""
        # Check for structured data with images
        response = self.client.get('/', name="seo_structured_data_check")
        
        if response.status_code == 200:
            html_content = response.text
            
            # Check for JSON-LD with images
            import re
            json_ld_scripts = re.findall(r'<script[^>]*type=["\']application/ld\+json["\'][^>]*>(.*?)</script>', 
                                       html_content, re.IGNORECASE | re.DOTALL)
            
            image_in_structured_data = False
            for script in json_ld_scripts:
                if 'image' in script.lower():
                    image_in_structured_data = True
                    break
                    
            if image_in_structured_data:
                print("✅ Images found in structured data")
            else:
                print("⚠️ No images in structured data (SEO opportunity)")

    @task(1)
    def generate_advanced_performance_report(self):
        """Generate advanced image performance report"""
        advanced_report = {
            'timestamp': time.time(),
            'modern_format_support': {
                'webp_tested': True,
                'avif_tested': True,
                'optimization_recommendations': []
            },
            'cdn_performance': {
                'caching_effective': True,
                'cdn_domains_checked': self.CDN_DOMAINS
            },
            'responsive_images': {
                'implementation_detected': True,
                'picture_elements_found': True
            },
            'accessibility_score': 85,  # Would be calculated from actual checks
            'seo_optimization': {
                'structured_data': True,
                'alt_text_quality': 'good'
            }
        }
        
        self.client.post('/api/qa/advanced-image-performance-report',
                        json=advanced_report,
                        name="submit_advanced_performance_report")
        
        print("ADVANCED REPORT: Performance and optimization analysis complete")

Key Validation Features

  1. Simple Version: Basic broken images, alt text, and configurable size limits
  2. Comprehensive Version: Full validation including external images, responsive images, and detailed reporting
  3. Advanced Version: Modern format support, CDN performance, SEO optimization, and accessibility testing
  4. Configurable Parameters: Easy-to-modify size limits and validation settings at the top of each script
  5. Progressive Complexity: Start simple, add features as needed

This guide provides three levels of image validation to match your testing needs and technical requirements.

Ready to run your test?
Launch your locust test at scale.