329 lines
196 KiB
HTML
329 lines
196 KiB
HTML
<!doctype html>
|
|
|
|
<html lang="es">
|
|
<head>
|
|
<meta charset="utf-8" />
|
|
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
|
<title>LinTO STT — Cliente Web (micrófono → WS → subtítulos)</title>
|
|
<style>
|
|
:root { color-scheme: light dark; }
|
|
body { font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif; margin: 16px; }
|
|
.row { display:flex; gap: 12px; flex-wrap: wrap; align-items: center; }
|
|
input, select, button { font-size: 16px; padding: 10px 12px; border-radius: 10px; border: 1px solid #6664; }
|
|
button { cursor: pointer; }
|
|
button:disabled { opacity: .6; cursor: not-allowed; }
|
|
.card { border: 1px solid #6664; border-radius: 16px; padding: 14px; margin-top: 12px; }
|
|
.subtitle { font-size: 28px; line-height: 1.25; min-height: 2.5em; }
|
|
.muted { opacity: .75; }
|
|
pre { white-space: pre-wrap; word-wrap: break-word; }
|
|
.pill { display:inline-block; padding: 4px 10px; border-radius: 999px; border:1px solid #6664; font-size: 12px; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>LinTO STT — Cliente Web</h1>
|
|
<p class="muted">Captura micrófono → re-muestrea a 16 kHz mono PCM 16-bit → envía por WebSocket → muestra subtítulos (parcial/final).</p> <div class="card">
|
|
<div class="row">
|
|
<label>
|
|
WebSocket URL
|
|
<input id="wsUrl" size="36" value="wss://livesst.thax.es/" />
|
|
</label>
|
|
<label>
|
|
Idioma (opcional)
|
|
<select id="lang">
|
|
<option value="">auto</option>
|
|
<option value="es">es</option>
|
|
<option value="en">en</option>
|
|
<option value="it">it</option>
|
|
<option value="fr">fr</option>
|
|
<option value="de">de</option>
|
|
</select>
|
|
</label>
|
|
<label>
|
|
Chunk (ms)
|
|
<select id="chunkMs">
|
|
<option value="250">250</option>
|
|
<option value="500" selected>500</option>
|
|
<option value="1000">1000</option>
|
|
</select>
|
|
</label>
|
|
</div><div class="row" style="margin-top:10px;">
|
|
<button id="btnStart">🎙️ Iniciar</button>
|
|
<button id="btnStop" disabled>⏹️ Parar</button>
|
|
<span id="status" class="pill">Desconectado</span>
|
|
<span class="muted">Tip: si tu servidor WS usa ruta <code>/ws</code>, ponla en la URL.</span>
|
|
</div>
|
|
|
|
</div> <div class="card">
|
|
<div class="subtitle" id="live">(subtítulo en vivo)</div>
|
|
<div class="muted" id="final" style="margin-top:10px;"></div>
|
|
</div> <div class="card">
|
|
<details>
|
|
<summary>Logs</summary>
|
|
<pre id="log"></pre>
|
|
</details>
|
|
</div><script>
|
|
// ---- Utilidades UI ----
|
|
const $ = (id) => document.getElementById(id);
|
|
const logEl = $('log');
|
|
const statusEl = $('status');
|
|
const liveEl = $('live');
|
|
const finalEl = $('final');
|
|
|
|
function log(msg) {
|
|
const ts = new Date().toISOString().slice(11, 19);
|
|
logEl.textContent += `[${ts}] ${msg}\n`;
|
|
logEl.scrollTop = logEl.scrollHeight;
|
|
}
|
|
|
|
function setStatus(text) {
|
|
statusEl.textContent = text;
|
|
}
|
|
|
|
// ---- Audio helpers ----
|
|
// Convierte Float32 [-1..1] a PCM16 little-endian.
|
|
function floatTo16BitPCM(float32Array) {
|
|
const buffer = new ArrayBuffer(float32Array.length * 2);
|
|
const view = new DataView(buffer);
|
|
for (let i = 0; i < float32Array.length; i++) {
|
|
let s = Math.max(-1, Math.min(1, float32Array[i]));
|
|
view.setInt16(i * 2, s < 0 ? s * 0x8000 : s * 0x7fff, true);
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
// Re-muestreo simple (linear interpolation) de sampleRateIn -> 16000.
|
|
function resampleTo16k(input, sampleRateIn) {
|
|
const sampleRateOut = 16000;
|
|
if (sampleRateIn === sampleRateOut) return input;
|
|
|
|
const ratio = sampleRateIn / sampleRateOut;
|
|
const outLength = Math.round(input.length / ratio);
|
|
const output = new Float32Array(outLength);
|
|
|
|
for (let i = 0; i < outLength; i++) {
|
|
const pos = i * ratio;
|
|
const left = Math.floor(pos);
|
|
const right = Math.min(left + 1, input.length - 1);
|
|
const frac = pos - left;
|
|
output[i] = input[left] * (1 - frac) + input[right] * frac;
|
|
}
|
|
return output;
|
|
}
|
|
|
|
// ---- Estado global ----
|
|
let ws = null;
|
|
let audioCtx = null;
|
|
let mediaStream = null;
|
|
let processor = null;
|
|
let chunkBuffer = []; // Float32 chunks
|
|
let chunkSamplesTarget = 0;
|
|
let inputSampleRate = 48000;
|
|
|
|
function resetAudioState() {
|
|
chunkBuffer = [];
|
|
chunkSamplesTarget = 0;
|
|
inputSampleRate = 48000;
|
|
}
|
|
|
|
function closeAll() {
|
|
try { if (processor) processor.disconnect(); } catch {}
|
|
try { if (audioCtx) audioCtx.close(); } catch {}
|
|
try { if (mediaStream) mediaStream.getTracks().forEach(t => t.stop()); } catch {}
|
|
try { if (ws) ws.close(); } catch {}
|
|
processor = null;
|
|
audioCtx = null;
|
|
mediaStream = null;
|
|
ws = null;
|
|
resetAudioState();
|
|
}
|
|
|
|
// ---- WS manejo ----
|
|
function buildWsUrl(baseUrl, lang) {
|
|
// Si tu backend acepta query params, los añadimos.
|
|
// Si no, no pasa nada: el servidor los ignorará.
|
|
const url = new URL(baseUrl);
|
|
if (lang) url.searchParams.set('language', lang);
|
|
return url.toString();
|
|
}
|
|
|
|
function connectWs() {
|
|
const baseUrl = $('wsUrl').value.trim();
|
|
const lang = $('lang').value;
|
|
const wsUrl = buildWsUrl(baseUrl, lang);
|
|
|
|
log(`Conectando WS: ${wsUrl}`);
|
|
ws = new WebSocket(wsUrl);
|
|
ws.binaryType = 'arraybuffer';
|
|
|
|
ws.onopen = () => {
|
|
setStatus('Conectado');
|
|
log('WS abierto');
|
|
|
|
// LinTO (streaming) suele requerir un mensaje de configuración ANTES de enviar audio.
|
|
// Si no se envía, el servidor puede cerrar con: "Failed to load configuration".
|
|
const lang = $('lang').value;
|
|
const cfg = {
|
|
config: {
|
|
sample_rate: 16000,
|
|
// Si el backend ignora language, no pasa nada.
|
|
...(lang ? { language: lang } : {})
|
|
}
|
|
};
|
|
try {
|
|
ws.send(JSON.stringify(cfg));
|
|
log('Config enviada (sample_rate=16000)');
|
|
} catch (e) {
|
|
log('No se pudo enviar config: ' + (e?.message || e));
|
|
}
|
|
};
|
|
|
|
ws.onerror = (e) => {
|
|
log('WS error (mira consola también)');
|
|
console.error(e);
|
|
};
|
|
|
|
ws.onclose = (e) => {
|
|
setStatus('Desconectado');
|
|
log(`WS cerrado: code=${e.code} reason=${e.reason || '(sin reason)'}`);
|
|
// Si cerró inesperadamente y seguimos "en marcha", apagamos audio.
|
|
if (!$('btnStart').disabled) return; // ya estamos parados
|
|
$('btnStop').disabled = true;
|
|
$('btnStart').disabled = false;
|
|
closeAll();
|
|
};
|
|
|
|
ws.onmessage = (evt) => {
|
|
// La mayoría de servidores devuelven JSON.
|
|
try {
|
|
const msg = JSON.parse(evt.data);
|
|
// Campos comunes: text, is_final/final, partial, etc.
|
|
const text = msg.text ?? msg.partial ?? msg.transcript ?? '';
|
|
const isFinal = msg.is_final ?? msg.final ?? false;
|
|
|
|
if (text) {
|
|
liveEl.textContent = text;
|
|
if (isFinal) {
|
|
finalEl.textContent += (finalEl.textContent ? ' ' : '') + text;
|
|
}
|
|
}
|
|
} catch {
|
|
// Si no es JSON, lo mostramos bruto.
|
|
log(`Mensaje: ${String(evt.data).slice(0, 200)}`);
|
|
}
|
|
};
|
|
}
|
|
|
|
// ---- Captura micrófono y envío ----
|
|
async function start() {
|
|
finalEl.textContent = '';
|
|
liveEl.textContent = '(escuchando...)';
|
|
// 1) Pide micrófono primero (en móvil es crucial; si falla WS, al menos sabrás que el permiso funciona)
|
|
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
|
|
|
|
// 2) Luego abre el WebSocket
|
|
connectWs();
|
|
|
|
// Espera a que WS abra (con timeout)
|
|
await new Promise((resolve, reject) => {
|
|
const t0 = Date.now();
|
|
const timer = setInterval(() => {
|
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
clearInterval(timer);
|
|
resolve();
|
|
}
|
|
if (Date.now() - t0 > 8000) {
|
|
clearInterval(timer);
|
|
reject(new Error('Timeout esperando WS OPEN'));
|
|
}
|
|
}, 50);
|
|
}).catch(err => {
|
|
log(err.message);
|
|
closeAll();
|
|
throw err;
|
|
});
|
|
|
|
|
|
audioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
|
inputSampleRate = audioCtx.sampleRate;
|
|
log(`AudioContext sampleRate=${inputSampleRate}`);
|
|
|
|
const source = audioCtx.createMediaStreamSource(mediaStream);
|
|
|
|
// ScriptProcessor es legacy pero funciona en móvil sin complicaciones.
|
|
// bufferSize: 4096 suele ir bien.
|
|
processor = audioCtx.createScriptProcessor(4096, 1, 1);
|
|
|
|
const chunkMs = parseInt($('chunkMs').value, 10);
|
|
// samples target en sampleRate de entrada
|
|
chunkSamplesTarget = Math.round((inputSampleRate * chunkMs) / 1000);
|
|
log(`Chunk target ~${chunkMs}ms => ${chunkSamplesTarget} samples @ ${inputSampleRate}Hz`);
|
|
|
|
processor.onaudioprocess = (e) => {
|
|
if (!ws || ws.readyState !== WebSocket.OPEN) return;
|
|
|
|
const input = e.inputBuffer.getChannelData(0);
|
|
// copiamos porque el buffer se reutiliza
|
|
chunkBuffer.push(new Float32Array(input));
|
|
|
|
// Unimos hasta tener chunkSamplesTarget
|
|
let total = chunkBuffer.reduce((acc, a) => acc + a.length, 0);
|
|
if (total < chunkSamplesTarget) return;
|
|
|
|
// concat
|
|
const merged = new Float32Array(total);
|
|
let offset = 0;
|
|
for (const arr of chunkBuffer) {
|
|
merged.set(arr, offset);
|
|
offset += arr.length;
|
|
}
|
|
chunkBuffer = [];
|
|
|
|
// resample -> 16k
|
|
const resampled = resampleTo16k(merged, inputSampleRate);
|
|
const pcm16 = floatTo16BitPCM(resampled);
|
|
|
|
try {
|
|
ws.send(pcm16);
|
|
} catch (err) {
|
|
log(`Error enviando audio: ${err.message}`);
|
|
}
|
|
};
|
|
|
|
source.connect(processor);
|
|
processor.connect(audioCtx.destination); // necesario en algunos navegadores
|
|
|
|
$('btnStart').disabled = true;
|
|
$('btnStop').disabled = false;
|
|
|
|
log('🎙️ Captura iniciada');
|
|
}
|
|
|
|
function stop() {
|
|
log('⏹️ Parando...');
|
|
$('btnStop').disabled = true;
|
|
$('btnStart').disabled = false;
|
|
closeAll();
|
|
liveEl.textContent = '(parado)';
|
|
setStatus('Desconectado');
|
|
}
|
|
|
|
// ---- UI ----
|
|
$('btnStart').addEventListener('click', async () => {
|
|
try {
|
|
await start();
|
|
} catch (err) {
|
|
alert(`No se pudo iniciar: ${err.message}`);
|
|
$('btnStop').disabled = true;
|
|
$('btnStart').disabled = false;
|
|
setStatus('Desconectado');
|
|
}
|
|
});
|
|
|
|
$('btnStop').addEventListener('click', () => stop());
|
|
|
|
// Buenas prácticas: cerrar al salir.
|
|
window.addEventListener('beforeunload', () => {
|
|
try { closeAll(); } catch {}
|
|
});
|
|
</script></body>
|
|
</html> |