Los sitios de reserva de viajes bloquean agresivamente las verificaciones automáticas de tarifas con CAPTCHA y detección de bots. CaptchaAI le permite monitorear los precios de vuelos y hoteles de manera confiable a través de estas defensas.
Paisaje CAPTCHA en sitios de viajes
| Categoría del sitio | Tipo CAPTCHA | dificultad |
|---|---|---|
| Aerolíneas (directas) | reCAPTCHA v3, Nubeflare | Medio |
| OTA (Expedia, Booking) | reCAPTCHA v2, Torniquete | Medio-alto |
| Metabúsqueda (Google Vuelos, Kayak) | reCAPTCHA v3 | Medio |
| Aerolíneas económicas | Imagen CAPTCHA, reCAPTCHA | Bajo-Medio |
| Agregadores de hoteles | Cloudflare Challenge | Alto |
Implementación del Monitor de Tarifas
import requests
import time
import re
import json
import os
from datetime import datetime, timedelta
API_KEY = os.environ["CAPTCHAAI_API_KEY"]
def solve_captcha(params):
params["key"] = API_KEY
resp = requests.get("https://ocr.captchaai.com/in.php", params=params)
if not resp.text.startswith("OK|"):
raise Exception(f"Submit: {resp.text}")
task_id = resp.text.split("|")[1]
for _ in range(60):
time.sleep(5)
result = requests.get("https://ocr.captchaai.com/res.php", params={
"key": API_KEY, "action": "get", "id": task_id,
})
if result.text == "CAPCHA_NOT_READY":
continue
if result.text.startswith("OK|"):
return result.text.split("|", 1)[1]
raise Exception(f"Solve: {result.text}")
raise TimeoutError()
class FareMonitor:
def __init__(self):
self.session = requests.Session()
self.session.headers["User-Agent"] = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 Chrome/120.0.0.0"
)
self.history = []
def fetch_with_captcha(self, url):
"""Fetch a travel page, solving CAPTCHAs if encountered."""
resp = self.session.get(url)
# reCAPTCHA v2/v3
match = re.search(
r'data-sitekey=["\']([A-Za-z0-9_-]+)["\']', resp.text
)
if match:
site_key = match.group(1)
# Detect v3 vs v2
if "recaptcha/api.js?render=" in resp.text:
token = solve_captcha({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
"version": "v3",
"action": "search",
})
else:
token = solve_captcha({
"method": "userrecaptcha",
"googlekey": site_key,
"pageurl": url,
})
resp = self.session.post(url, data={
"g-recaptcha-response": token,
})
# Cloudflare Turnstile
if "cf-turnstile" in resp.text:
match = re.search(
r'data-sitekey=["\']([^"\']+)', resp.text
)
if match:
token = solve_captcha({
"method": "turnstile",
"sitekey": match.group(1),
"pageurl": url,
})
resp = self.session.post(url, data={
"cf-turnstile-response": token,
})
return resp.text
def check_fares(self, routes):
"""Check fares for a list of routes."""
results = []
for route in routes:
try:
html = self.fetch_with_captcha(route["url"])
prices = self._extract_prices(html)
result = {
"route": f"{route['origin']}-{route['destination']}",
"date": route["date"],
"prices": prices,
"min_price": min(prices) if prices else None,
"timestamp": datetime.utcnow().isoformat(),
}
results.append(result)
self.history.append(result)
if prices:
print(f" {result['route']} ({route['date']}): "
f"${min(prices)}-${max(prices)}")
else:
print(f" {result['route']}: No prices found")
time.sleep(3) # Respectful delay
except Exception as e:
print(f" {route.get('origin', '?')}-"
f"{route.get('destination', '?')}: ERROR - {e}")
return results
def _extract_prices(self, html):
"""Extract prices from travel page HTML."""
prices = []
# Common price patterns
for match in re.finditer(
r'\$\s*([\d,]+(?:\.\d{2})?)', html
):
price = float(match.group(1).replace(",", ""))
if 20 < price < 10000: # Filter noise
prices.append(price)
return sorted(set(prices))
def detect_price_changes(self, threshold_pct=5):
"""Detect significant price changes in history."""
route_prices = {}
for entry in self.history:
key = f"{entry['route']}_{entry['date']}"
if key not in route_prices:
route_prices[key] = []
if entry["min_price"]:
route_prices[key].append(entry["min_price"])
alerts = []
for key, prices in route_prices.items():
if len(prices) >= 2:
prev = prices[-2]
current = prices[-1]
change_pct = ((current - prev) / prev) * 100
if change_pct < -threshold_pct:
alerts.append({
"route": key,
"previous": prev,
"current": current,
"change": f"{change_pct:.1f}%",
})
return alerts
def export_report(self, filename="fare_report.json"):
"""Export fare history to JSON."""
with open(filename, "w") as f:
json.dump(self.history, f, indent=2)
print(f"Exported {len(self.history)} fare checks to {filename}")
# Define routes to monitor
routes = [
{
"origin": "JFK",
"destination": "LAX",
"date": "2025-03-15",
"url": "https://example-airline.com/flights?from=JFK&to=LAX&date=2025-03-15",
},
{
"origin": "SFO",
"destination": "ORD",
"date": "2025-03-20",
"url": "https://example-airline.com/flights?from=SFO&to=ORD&date=2025-03-20",
},
]
monitor = FareMonitor()
results = monitor.check_fares(routes)
monitor.export_report()
Programación
# Check fares every 4 hours
0 */4 * * * cd /opt/fare-monitor && python fare_monitor.py
Análisis de costos
| Nivel de monitoreo | Rutas | Cheques/Day | CAPTCHAs/Day | Est. Costo |
|---|---|---|---|---|
| personales | 5 | 6/route | ~30 | $0.50 |
| Agencia pequeña | 50 | 4/route | ~200 | $2-5 |
| Empresa | 500 | 6/route | ~3000 | $20-40 |
Preguntas frecuentes
¿Con qué frecuencia debo consultar las tarifas?
Cada 4-6 horas para uso personal. Cada 1-2 horas para uso comercial. Las aerolíneas actualizan los precios en lotes, por lo que controles más frecuentes producen rendimientos decrecientes.
¿Puedo controlar los precios de los hoteles también?
Sí. El mismo enfoque funciona para Booking.com, Expedia y sitios directos de hoteles. Ajuste los patrones de extracción de precios para los formatos de páginas de hoteles.
¿Cómo manejo las páginas de precios dinámicas?
Algunos sitios de viajes requieren renderizado de JavaScript. Utilice Selenium o Playwright para buscar la página, luego CaptchaAI para resolver CAPTCHA.
Guías relacionadas
- Monitoreo de precios de comercio electrónico
- Raspar sin bloquearse
- Rotación de proxy para raspado CAPTCHA