Initial commit: Vuelato - buscador de vuelos
Some checks failed
ci / ci (22, ubuntu-latest) (push) Has been cancelled

Nuxt 4 + Supabase + Flightics API. Incluye búsqueda de vuelos,
inspiraciones, watchlist, tracking de precios y mapa interactivo.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alejandro Martinez
2026-04-10 23:37:06 +02:00
commit b8906efc80
122 changed files with 37809 additions and 0 deletions

View File

@@ -0,0 +1,520 @@
/**
* Descubre URLs de reserva de aerolineas usando Playwright.
*
* Estrategia por orden de prioridad:
* 1. Buscar links en el HTML con codigos IATA → extraer template de URL
* 2. Interceptar pushState/replaceState al interactuar con formulario
* 3. Capturar requests de red con parametros de busqueda
* 4. Fallback: guardar la booking page URL sin template
*
* Ejecutar: npx tsx scripts/discover-booking-urls.ts --offset 0 --limit 100
* Test: npx tsx scripts/discover-booking-urls.ts --iata KL,BA,FR
* 2nd pass: npx tsx scripts/discover-booking-urls.ts --retry-failed --offset 0 --limit 1020
*/
import { chromium, type Browser, type Page } from 'playwright'
import { createClient } from '@supabase/supabase-js'
import { parseArgs } from 'node:util'
// --- Config ---
const SUPABASE_URL = process.env.SUPABASE_URL || 'http://localhost:8000'
const SUPABASE_KEY = process.env.SUPABASE_SERVICE_ROLE_KEY || ''
const SITE_TIMEOUT = 20_000
const NAV_TIMEOUT = 15_000
// Well-known IATA airport codes used to detect search URL patterns in links
// If any of these appear in an href, the link likely reveals the search URL format
const KNOWN_IATA_CODES = new Set([
'AMS', 'LHR', 'CDG', 'FRA', 'MAD', 'BCN', 'FCO', 'MXP', 'IST', 'ATH',
'JFK', 'LAX', 'MIA', 'SFO', 'ORD', 'BOS', 'ATL', 'DFW', 'SEA', 'DEN',
'NRT', 'HND', 'ICN', 'PEK', 'PVG', 'HKG', 'SIN', 'BKK', 'DEL', 'BOM',
'DXB', 'DOH', 'CAI', 'JNB', 'NBO', 'ADD', 'CMN', 'ALG', 'LOS', 'ACC',
'GRU', 'EZE', 'BOG', 'LIM', 'SCL', 'MEX', 'CUN', 'PTY', 'SJO', 'HAV',
'SYD', 'MEL', 'AKL', 'NYC', 'LON', 'PAR', 'TYO', 'ROM', 'MIL',
])
// Booking-related href keywords
const BOOKING_HREF_PATTERNS = [
'book', 'booking', 'flight', 'search', 'reserv', 'vuelo',
'fly', 'ticket', 'trip', 'travel', 'buy', 'fare', 'offer'
]
const BOOKING_HREF_SELECTOR = BOOKING_HREF_PATTERNS
.map(p => `a[href*="${p}" i]`)
.join(', ')
// Selectors for search form inputs
const ORIGIN_SELECTORS = [
'input[name*="origin" i]', 'input[name*="from" i]', 'input[name*="departure" i]',
'input[name*="depart" i]', 'input[name*="salida" i]', 'input[name*="origen" i]',
'input[placeholder*="from" i]', 'input[placeholder*="origin" i]',
'input[placeholder*="departure" i]', 'input[placeholder*="desde" i]',
'input[placeholder*="origen" i]', 'input[placeholder*="salida" i]',
'input[aria-label*="from" i]', 'input[aria-label*="origin" i]',
'input[aria-label*="departure" i]', 'input[aria-label*="desde" i]',
'input[id*="origin" i]', 'input[id*="from" i]', 'input[id*="depart" i]',
]
const DEST_SELECTORS = [
'input[name*="destination" i]', 'input[name*="to" i]', 'input[name*="arrival" i]',
'input[name*="arriv" i]', 'input[name*="destino" i]', 'input[name*="llegada" i]',
'input[placeholder*="to" i]', 'input[placeholder*="destination" i]',
'input[placeholder*="arrival" i]', 'input[placeholder*="hacia" i]',
'input[placeholder*="destino" i]', 'input[placeholder*="llegada" i]',
'input[aria-label*="to" i]', 'input[aria-label*="destination" i]',
'input[aria-label*="arrival" i]', 'input[aria-label*="destino" i]',
'input[id*="destination" i]', 'input[id*="to" i]', 'input[id*="arriv" i]',
]
const SEARCH_BUTTON_SELECTORS = [
'button[type="submit"]',
'button:has-text("Search")', 'button:has-text("Buscar")',
'button:has-text("Book")', 'button:has-text("Find")',
'button:has-text("Reservar")', 'button:has-text("Buscar vuelos")',
'button:has-text("Search flights")', 'button:has-text("Find flights")',
'a:has-text("Search")', 'a:has-text("Buscar")',
'input[type="submit"]',
]
// --- Supabase ---
const supabase = createClient(SUPABASE_URL, SUPABASE_KEY)
interface Airline {
iata: string
name: string
website: string
}
interface DiscoveryResult {
iata: string
bookingUrl: string | null
bookingUrlTemplate: string | null
method?: string // how the template was discovered
error?: string
}
// --- Template extraction from links ---
/**
* Scan all links on the page for URLs containing IATA airport codes.
* These destination links reveal the search URL pattern.
* Returns a template with {origin} and {destination} placeholders.
*/
async function extractTemplateFromLinks(page: Page, baseUrl: string): Promise<{ template: string; bookingUrl: string } | null> {
try {
const links = await page.$$eval('a[href]', (els) => {
return els.map(el => ({
href: el.getAttribute('href') || '',
text: (el.textContent || '').trim().slice(0, 100),
}))
})
for (const link of links) {
const href = link.href
if (!href || href.length < 10 || href.length > 500) continue
// Must look like a FLIGHT search/booking URL
if (!href.match(/search|book|flight|reserv|offer|fare|vuelo|select/i)) continue
// Exclude non-flight links (cars, hotels, guides, insurance, etc.)
if (href.match(/car[s.]|hotel|guide|insurance|lounge|cargo|club|baggage|checkin|check-in|status|manage/i)) continue
// Find known IATA airport codes in the URL
const decoded = decodeURIComponent(href)
const threeLetterWords = [...decoded.matchAll(/\b([A-Z]{3})\b/g)].map(m => m[1])
const foundCodes = threeLetterWords.filter(c => KNOWN_IATA_CODES.has(c))
// Need at least one real airport code
if (foundCodes.length < 1) continue
let template = href
const resolvedUrl = resolveUrl(baseUrl, href)
const uniqueCodes = [...new Set(foundCodes)]
if (uniqueCodes.length >= 2) {
// Two codes: first = origin, second = destination
template = template.replace(new RegExp(`\\b${uniqueCodes[0]}\\b`), '{origin}')
template = template.replace(new RegExp(`\\b${uniqueCodes[1]}\\b`), '{destination}')
// Handle round-trip (origin repeated at end)
template = template.replace(new RegExp(`\\b${uniqueCodes[0]}\\b`), '{origin}')
} else {
// Single code — likely a destination-only link from the homepage
template = template.replace(new RegExp(`\\b${uniqueCodes[0]}\\b`, 'g'), '{destination}')
}
// Replace passenger counts in query params
template = template.replace(/(?<=[=:])1(?=[&:,\s]|$)/g, '{passengers}')
if (template.includes('{destination}') || template.includes('{origin}')) {
const resolvedTemplate = resolveUrl(baseUrl, template)
return {
template: resolvedTemplate,
bookingUrl: resolvedUrl,
}
}
}
} catch {
// DOM query failed
}
return null
}
// --- Helper functions ---
function resolveUrl(base: string, href: string): string {
try {
return new URL(href, base).toString()
} catch {
return href
}
}
async function findBookingLink(page: Page): Promise<string | null> {
try {
const links = await page.$$(BOOKING_HREF_SELECTOR)
for (const link of links) {
const href = await link.getAttribute('href')
const text = (await link.textContent())?.toLowerCase() || ''
const isVisible = await link.isVisible().catch(() => false)
if (!href || !isVisible) continue
if (href.includes('career') || href.includes('about') || href.includes('press') || href.includes('blog')) continue
if (text.match(/book|reserv|search|buscar|vuelo|flight|fly|ticket|buy/i) || href.match(/book|reserv|search|flight/i)) {
return href
}
}
if (links.length > 0) {
return await links[0].getAttribute('href')
}
} catch {}
return null
}
async function findAndFillInput(page: Page, selectors: string[], value: string): Promise<boolean> {
for (const sel of selectors) {
try {
const el = await page.$(sel)
if (el && await el.isVisible().catch(() => false)) {
await el.click()
await el.fill(value)
await page.waitForTimeout(500)
await el.press('Enter').catch(() => {})
return true
}
} catch { continue }
}
return false
}
async function clickSearchButton(page: Page): Promise<boolean> {
for (const sel of SEARCH_BUTTON_SELECTORS) {
try {
const btn = await page.$(sel)
if (btn && await btn.isVisible().catch(() => false)) {
await btn.click()
return true
}
} catch { continue }
}
return false
}
function buildTemplateFromUrl(url: string, origin: string, destination: string, dateIso: string): string {
let template = url
const dateCompact = dateIso.replace(/-/g, '')
const dateDMY = dateIso.split('-').reverse().join('/')
template = template.replaceAll(dateIso, '{date}')
template = template.replaceAll(dateDMY, '{date}')
template = template.replaceAll(dateCompact, '{date}')
template = template.replaceAll(encodeURIComponent(dateIso), '{date}')
template = template.replaceAll(encodeURIComponent(dateDMY), '{date}')
template = template.replace(new RegExp(origin, 'gi'), '{origin}')
template = template.replace(new RegExp(destination, 'gi'), '{destination}')
template = template.replace(/madrid/gi, '{origin}')
template = template.replace(/london/gi, '{destination}')
template = template.replace(/londres/gi, '{destination}')
template = template.replace(/heathrow/gi, '{destination}')
const urlParts = template.split('?')
if (urlParts[1]) {
urlParts[1] = urlParts[1].replace(/(?<=[=:])1(?=[&:,\s]|$)/g, '{passengers}')
template = urlParts.join('?')
}
return template
}
// --- Main discovery function ---
async function discoverAirline(browser: Browser, airline: Airline): Promise<DiscoveryResult> {
const result: DiscoveryResult = {
iata: airline.iata,
bookingUrl: null,
bookingUrlTemplate: null
}
let page: Page | null = null
try {
page = await browser.newPage({
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36',
viewport: { width: 1280, height: 720 },
locale: 'es-ES',
})
page.setDefaultTimeout(SITE_TIMEOUT)
// Mask webdriver + capture pushState/replaceState
await page.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => false })
;(window as any).__capturedUrls = []
const origPush = history.pushState.bind(history)
const origReplace = history.replaceState.bind(history)
history.pushState = (...args: any[]) => {
;(window as any).__capturedUrls.push(args[2])
return origPush(...args)
}
history.replaceState = (...args: any[]) => {
;(window as any).__capturedUrls.push(args[2])
return origReplace(...args)
}
})
// Capture search-related network requests
const searchRequests: string[] = []
page.on('request', req => {
const url = req.url()
if (req.resourceType() === 'xhr' || req.resourceType() === 'fetch') {
if (url.match(/search|book|flight|offer|avail|fare/i)) {
searchRequests.push(url)
}
}
})
// Step 1: Navigate to airline website
await page.goto(airline.website, { waitUntil: 'domcontentloaded', timeout: SITE_TIMEOUT })
await page.waitForTimeout(3000)
const startUrl = page.url()
// ========================================
// Strategy 1: Extract template from links
// ========================================
const linkTemplate = await extractTemplateFromLinks(page, startUrl)
if (linkTemplate) {
result.bookingUrlTemplate = linkTemplate.template
result.bookingUrl = linkTemplate.bookingUrl
result.method = 'link-template'
return result
}
// ========================================
// Strategy 2: Find booking page + try form interaction
// ========================================
const bookingHref = await findBookingLink(page)
if (bookingHref) {
const bookingUrl = resolveUrl(startUrl, bookingHref)
result.bookingUrl = bookingUrl
try {
await page.goto(bookingUrl, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT })
await page.waitForTimeout(2000)
} catch {
// Navigation failed, but we have the URL
}
// Check links on booking page too
const bookingPageTemplate = await extractTemplateFromLinks(page, page.url())
if (bookingPageTemplate) {
result.bookingUrlTemplate = bookingPageTemplate.template
result.bookingUrl = bookingPageTemplate.bookingUrl
result.method = 'booking-page-link-template'
return result
}
} else {
result.bookingUrl = startUrl
}
// ========================================
// Strategy 3: Fill form + capture URL change / pushState
// ========================================
const testOrigin = 'MAD'
const testDest = 'LHR'
const testDate = (() => {
const d = new Date()
d.setDate(d.getDate() + 30)
return d.toISOString().slice(0, 10)
})()
const filledOrigin = await findAndFillInput(page, ORIGIN_SELECTORS, testOrigin)
const filledDest = await findAndFillInput(page, DEST_SELECTORS, testDest)
if (filledOrigin || filledDest) {
const urlBefore = page.url()
const clicked = await clickSearchButton(page)
if (clicked) {
// Wait for navigation or pushState
await page.waitForTimeout(5000)
// Check 3a: URL changed (traditional navigation)
const urlAfter = page.url()
if (urlAfter !== urlBefore) {
result.bookingUrlTemplate = buildTemplateFromUrl(urlAfter, testOrigin, testDest, testDate)
result.bookingUrl = urlAfter
result.method = 'form-url-change'
return result
}
// Check 3b: pushState/replaceState captured
const captured: string[] = await page.evaluate(() => (window as any).__capturedUrls || [])
const relevantCapture = captured.find(u =>
typeof u === 'string' && (u.includes(testOrigin) || u.includes(testDest) || u.match(/search|book|flight/i))
)
if (relevantCapture) {
const fullUrl = resolveUrl(page.url(), relevantCapture)
result.bookingUrlTemplate = buildTemplateFromUrl(fullUrl, testOrigin, testDest, testDate)
result.bookingUrl = fullUrl
result.method = 'pushstate'
return result
}
// Check 3c: Network requests with search params
const relevantRequest = searchRequests.find(u =>
u.includes(testOrigin) || u.includes(testDest)
)
if (relevantRequest) {
result.bookingUrlTemplate = buildTemplateFromUrl(relevantRequest, testOrigin, testDest, testDate)
result.bookingUrl = relevantRequest
result.method = 'network-request'
return result
}
}
}
} catch (err: any) {
result.error = err.message?.slice(0, 200)
} finally {
await page?.close().catch(() => {})
}
return result
}
// --- Main ---
async function main() {
const { values } = parseArgs({
options: {
offset: { type: 'string', default: '0' },
limit: { type: 'string', default: '100' },
iata: { type: 'string' },
'retry-failed': { type: 'boolean', default: false },
}
})
const offset = parseInt(values.offset!)
const limit = parseInt(values.limit!)
const iataCodes = values.iata?.split(',').map(s => s.trim().toUpperCase())
const retryFailed = values['retry-failed']
if (iataCodes) {
console.log(`[discover] Starting with specific airlines: ${iataCodes.join(', ')}`)
} else if (retryFailed) {
console.log(`[discover] Retrying airlines without template, offset=${offset} limit=${limit}`)
} else {
console.log(`[discover] Starting offset=${offset} limit=${limit}`)
}
// Fetch airlines from Supabase
let query = supabase
.from('airlines')
.select('iata, name, website')
.not('website', 'is', null)
.order('iata')
if (iataCodes) {
query = query.in('iata', iataCodes)
} else {
if (retryFailed) {
// Only process airlines that have no template yet
query = query.is('booking_url_template', null)
}
query = query.range(offset, offset + limit - 1)
}
const { data: airlines, error } = await query
if (error) {
console.error('[discover] Failed to fetch airlines:', error.message)
process.exit(1)
}
if (!airlines?.length) {
console.log('[discover] No airlines to process')
process.exit(0)
}
console.log(`[discover] Processing ${airlines.length} airlines`)
const browser = await chromium.launch({
headless: true,
args: [
'--disable-blink-features=AutomationControlled',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-http2',
]
})
let discovered = 0
let withTemplate = 0
const failed: string[] = []
for (let i = 0; i < airlines.length; i++) {
const airline = airlines[i] as Airline
const progress = `[${i + 1}/${airlines.length}]`
try {
const result = await discoverAirline(browser, airline)
if (result.bookingUrl || result.bookingUrlTemplate) {
const update: Record<string, any> = {
booking_url_discovered_at: new Date().toISOString()
}
if (result.bookingUrl) update.booking_url = result.bookingUrl
if (result.bookingUrlTemplate) update.booking_url_template = result.bookingUrlTemplate
await supabase
.from('airlines')
.update(update)
.eq('iata', airline.iata)
discovered++
if (result.bookingUrlTemplate) withTemplate++
const methodTag = result.method ? ` [${result.method}]` : ''
console.log(`${progress} ${airline.iata} (${airline.name}): OK${result.bookingUrlTemplate ? ' +template' : ''}${methodTag} -> ${result.bookingUrl?.slice(0, 120)}`)
} else {
failed.push(airline.iata)
console.log(`${progress} ${airline.iata} (${airline.name}): SKIP${result.error ? ` (${result.error.slice(0, 80)})` : ''}`)
}
} catch (err: any) {
failed.push(airline.iata)
console.log(`${progress} ${airline.iata} (${airline.name}): ERROR ${err.message?.slice(0, 80)}`)
}
}
await browser.close()
console.log(`\n[discover] Done: ${discovered} discovered, ${withTemplate} with template, ${failed.length} failed`)
if (failed.length > 0) console.log(`[discover] Failed: ${failed.join(', ')}`)
console.log(JSON.stringify({ discovered, withTemplate, failed: failed.length, failedCodes: failed }))
}
main().catch(err => {
console.error('[discover] Fatal:', err)
process.exit(1)
})