#include "http_validator.h" #include #include #include #include namespace mosis { HttpValidator::HttpValidator() : m_domain_restrictions_enabled(false) { } void HttpValidator::SetAllowedDomains(const std::vector& domains) { m_allowed_domains = domains; m_domain_restrictions_enabled = !domains.empty(); } void HttpValidator::ClearDomainRestrictions() { m_allowed_domains.clear(); m_domain_restrictions_enabled = false; } std::optional HttpValidator::Validate(const std::string& url, std::string& error) { // Parse URL auto parsed = ParseUrl(url); if (!parsed) { error = "Invalid URL format"; return std::nullopt; } // Must be HTTPS or WSS if (parsed->scheme != "https" && parsed->scheme != "wss") { error = "HTTPS or WSS required, got: " + parsed->scheme; return std::nullopt; } // Check for localhost names if (IsLocalhostName(parsed->host)) { error = "localhost blocked for security"; return std::nullopt; } // Check for metadata hostnames if (IsMetadataHostname(parsed->host)) { error = "Cloud metadata hostname blocked for security"; return std::nullopt; } // Check if it's an IP address and validate if (parsed->is_ip_address) { if (IsBlockedIP(parsed->host)) { error = "IP address blocked: private, localhost, or metadata endpoint"; return std::nullopt; } } // Check domain whitelist if (m_domain_restrictions_enabled && !IsDomainAllowed(parsed->host)) { error = "Domain not in allowed list: " + parsed->host; return std::nullopt; } return parsed; } bool HttpValidator::IsIPv4Address(const std::string& host) { // Simple IPv4 pattern: numbers and dots if (host.empty()) return false; int dots = 0; int num_start = 0; for (size_t i = 0; i <= host.length(); i++) { if (i == host.length() || host[i] == '.') { if (i == (size_t)num_start) return false; // Empty segment std::string segment = host.substr(num_start, i - num_start); // Check if segment is a valid number 0-255 if (segment.empty() || segment.length() > 3) return false; for (char c : segment) { if (!std::isdigit(static_cast(c))) return false; } int val = std::stoi(segment); if (val < 0 || val > 255) return false; if (i < host.length()) { dots++; num_start = static_cast(i) + 1; } } } return dots == 3; } bool HttpValidator::IsIPv6Address(const std::string& host) { // IPv6 addresses in URLs are enclosed in brackets: [::1] if (host.length() < 2) return false; if (host.front() == '[' && host.back() == ']') { return true; // Simplified check - bracket notation means IPv6 } // Also check for raw IPv6 (contains colons, no dots or limited dots) int colons = std::count(host.begin(), host.end(), ':'); int dots = std::count(host.begin(), host.end(), '.'); return colons >= 2 && dots <= 3; // IPv6 has multiple colons } bool HttpValidator::IsPrivateIPv4(const std::string& ip) { // Parse IPv4 octets std::array octets{}; if (sscanf(ip.c_str(), "%d.%d.%d.%d", &octets[0], &octets[1], &octets[2], &octets[3]) != 4) { return false; } // 0.0.0.0 - all interfaces if (octets[0] == 0 && octets[1] == 0 && octets[2] == 0 && octets[3] == 0) { return true; } // 127.0.0.0/8 - loopback if (octets[0] == 127) { return true; } // 10.0.0.0/8 - private Class A if (octets[0] == 10) { return true; } // 172.16.0.0/12 - private Class B (172.16.0.0 - 172.31.255.255) if (octets[0] == 172 && octets[1] >= 16 && octets[1] <= 31) { return true; } // 192.168.0.0/16 - private Class C if (octets[0] == 192 && octets[1] == 168) { return true; } // 169.254.0.0/16 - link-local if (octets[0] == 169 && octets[1] == 254) { return true; } return false; } bool HttpValidator::IsPrivateIPv6(const std::string& ip) { std::string addr = ip; // Remove brackets if present if (!addr.empty() && addr.front() == '[') addr = addr.substr(1); if (!addr.empty() && addr.back() == ']') addr.pop_back(); // Convert to lowercase for comparison std::transform(addr.begin(), addr.end(), addr.begin(), [](unsigned char c) { return std::tolower(c); }); // ::1 - loopback if (addr == "::1" || addr == "0:0:0:0:0:0:0:1") { return true; } // :: - unspecified (equivalent to 0.0.0.0) if (addr == "::" || addr == "0:0:0:0:0:0:0:0") { return true; } // fc00::/7 - unique local addresses (fc00:: to fdff::) if (addr.length() >= 2) { char first = addr[0]; char second = addr.length() > 1 ? addr[1] : '0'; if (first == 'f' && (second == 'c' || second == 'd')) { return true; } } // fe80::/10 - link-local if (addr.rfind("fe80:", 0) == 0 || addr.rfind("fe8", 0) == 0 || addr.rfind("fe9", 0) == 0 || addr.rfind("fea", 0) == 0 || addr.rfind("feb", 0) == 0) { return true; } return false; } bool HttpValidator::IsLocalhostIP(const std::string& host) { // IPv4 localhost if (IsIPv4Address(host)) { std::array octets{}; if (sscanf(host.c_str(), "%d.%d.%d.%d", &octets[0], &octets[1], &octets[2], &octets[3]) == 4) { return octets[0] == 127; } } // IPv6 localhost std::string addr = host; if (!addr.empty() && addr.front() == '[') addr = addr.substr(1); if (!addr.empty() && addr.back() == ']') addr.pop_back(); std::transform(addr.begin(), addr.end(), addr.begin(), [](unsigned char c) { return std::tolower(c); }); return addr == "::1" || addr == "0:0:0:0:0:0:0:1"; } bool HttpValidator::IsMetadataIP(const std::string& host) { // AWS/Azure/GCP metadata endpoint if (host == "169.254.169.254") { return true; } // GCP alternate if (host == "metadata.google.internal") { return true; } return false; } bool HttpValidator::IsBlockedIP(const std::string& host) { if (IsIPv4Address(host)) { return IsPrivateIPv4(host) || IsMetadataIP(host); } if (IsIPv6Address(host)) { return IsPrivateIPv6(host); } return false; } bool HttpValidator::IsDomainAllowed(const std::string& host) { if (!m_domain_restrictions_enabled) { return true; } std::string lower_host = host; std::transform(lower_host.begin(), lower_host.end(), lower_host.begin(), [](unsigned char c) { return std::tolower(c); }); for (const auto& domain : m_allowed_domains) { std::string lower_domain = domain; std::transform(lower_domain.begin(), lower_domain.end(), lower_domain.begin(), [](unsigned char c) { return std::tolower(c); }); // Exact match if (lower_host == lower_domain) { return true; } // Subdomain match (e.g., "api.example.com" matches "example.com") if (lower_host.length() > lower_domain.length()) { size_t pos = lower_host.length() - lower_domain.length(); if (lower_host[pos - 1] == '.' && lower_host.substr(pos) == lower_domain) { return true; } } } return false; } bool HttpValidator::IsLocalhostName(const std::string& host) { std::string lower = host; std::transform(lower.begin(), lower.end(), lower.begin(), [](unsigned char c) { return std::tolower(c); }); // Common localhost names if (lower == "localhost") return true; if (lower == "localhost.localdomain") return true; // Ends with .localhost if (lower.length() > 10 && lower.substr(lower.length() - 10) == ".localhost") { return true; } return false; } bool HttpValidator::IsMetadataHostname(const std::string& host) { std::string lower = host; std::transform(lower.begin(), lower.end(), lower.begin(), [](unsigned char c) { return std::tolower(c); }); // GCP metadata if (lower == "metadata.google.internal") return true; if (lower == "metadata") return true; // Azure metadata if (lower == "metadata.azure.internal") return true; return false; } std::optional HttpValidator::ParseUrl(const std::string& url) { ParsedUrl result; result.port = 443; // Default HTTPS port result.is_ip_address = false; // Find scheme size_t scheme_end = url.find("://"); if (scheme_end == std::string::npos) { return std::nullopt; } result.scheme = url.substr(0, scheme_end); std::transform(result.scheme.begin(), result.scheme.end(), result.scheme.begin(), [](unsigned char c) { return std::tolower(c); }); // Start of authority size_t auth_start = scheme_end + 3; if (auth_start >= url.length()) { return std::nullopt; } // Find end of authority (path starts with /) size_t path_start = url.find('/', auth_start); std::string authority; if (path_start == std::string::npos) { authority = url.substr(auth_start); result.path = "/"; } else { authority = url.substr(auth_start, path_start - auth_start); // Find query string size_t query_start = url.find('?', path_start); if (query_start != std::string::npos) { result.path = url.substr(path_start, query_start - path_start); result.query = url.substr(query_start); } else { result.path = url.substr(path_start); } } if (authority.empty()) { return std::nullopt; } // Parse authority for host and port // Handle IPv6 addresses in brackets if (authority[0] == '[') { size_t bracket_end = authority.find(']'); if (bracket_end == std::string::npos) { return std::nullopt; // Malformed IPv6 } result.host = authority.substr(0, bracket_end + 1); result.is_ip_address = true; // Check for port after bracket if (bracket_end + 1 < authority.length()) { if (authority[bracket_end + 1] == ':') { std::string port_str = authority.substr(bracket_end + 2); try { result.port = static_cast(std::stoi(port_str)); } catch (...) { return std::nullopt; } } } } else { // Regular host or IPv4 size_t port_pos = authority.rfind(':'); if (port_pos != std::string::npos) { result.host = authority.substr(0, port_pos); std::string port_str = authority.substr(port_pos + 1); try { result.port = static_cast(std::stoi(port_str)); } catch (...) { return std::nullopt; } } else { result.host = authority; } // Check if it's an IP address result.is_ip_address = IsIPv4Address(result.host) || IsIPv6Address(result.host); } // Default port based on scheme if ((result.scheme == "https" || result.scheme == "wss") && result.port == 0) { result.port = 443; } else if ((result.scheme == "http" || result.scheme == "ws") && result.port == 0) { result.port = 80; } return result; } } // namespace mosis