MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 11: | Zeile 11: | ||
maxSide: 1024, | maxSide: 1024, | ||
debug: true, | debug: true, | ||
// fürs Debug: Modell beim Klick einmal „warmladen“ | |||
forceModelWarmup: true | forceModelWarmup: true | ||
}; | }; | ||
| Zeile 67: | Zeile 68: | ||
} | } | ||
// ------------------------- CLIP / Transformers ------------------------- | // ------------------------- CLIP / Transformers (vision-only) ------------------------- | ||
let | let _visionLoadPromise=null; | ||
async function | async function ensureClipVision(){ | ||
if( | if(_visionLoadPromise) return _visionLoadPromise; | ||
setStatus('Modell laden …'); setProgress(0.08); | setStatus('Modell laden …'); setProgress(0.08); | ||
_visionLoadPromise = (async()=>{ | |||
try{ | try{ | ||
const mod = await import(/* webpackIgnore: true */ CFG.transformersURL); | const mod = await import(/* webpackIgnore: true */ CFG.transformersURL); | ||
// | // Runtime-Umgebung | ||
mod.env.remoteModels = true; | mod.env.remoteModels = true; | ||
mod.env.allowRemoteModels = true; | mod.env.allowRemoteModels = true; | ||
mod.env.useBrowserCache = true; | mod.env.useBrowserCache = true; | ||
// | // WASM-Pfade: nutze das Transformers-CDN (enthält ort-wasm-simd.wasm) | ||
mod.env.backends = mod.env.backends || {}; | mod.env.backends = mod.env.backends || {}; | ||
mod.env.backends.onnx = mod.env.backends.onnx || {}; | mod.env.backends.onnx = mod.env.backends.onnx || {}; | ||
| Zeile 91: | Zeile 90: | ||
mod.env.backends.onnx.wasm.wasmPaths = | mod.env.backends.onnx.wasm.wasmPaths = | ||
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/'; | 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/'; | ||
// Threads konservativ | |||
mod.env.backends.onnx.wasm.numThreads = 1; | |||
mod.env.backends.onnx.wasm.simd = true; | |||
// | // (optional) WebGPU probieren | ||
try { | try { | ||
if ('gpu' in navigator) { | if ('gpu' in navigator) { | ||
| Zeile 100: | Zeile 102: | ||
} catch(_) {} | } catch(_) {} | ||
// | // ⬇️ Statt pipeline: explizit Vision-Modell + Processor laden | ||
const | const processor = await mod.AutoProcessor.from_pretrained(CFG.modelId); | ||
// Wichtig: den VISION-Zweig laden, nicht den Text-Zweig | |||
const model = await mod.CLIPVisionModelWithProjection.from_pretrained( | |||
CFG.modelId, | CFG.modelId, | ||
{ quantized:true } | { quantized: true } | ||
); | ); | ||
// | // kleines Warmup (1x 32x32 „Bild“) – optional | ||
try{ | |||
const dummy = new ImageData(32,32); | |||
const inputs = await processor(dummy, { return_tensors: 'pt' }); | |||
await model.forward({ pixel_values: inputs.pixel_values }); | |||
} catch(_) {} | |||
return { mod, | let backend='unknown'; | ||
try { backend = model?.session?.executionProvider || backend; } catch(_){} | |||
log('CLIP ready (vision):', model?.constructor?.name || 'unknown', '| Backend:', backend); | |||
return { mod, processor, model }; | |||
} catch(e){ | } catch(e){ | ||
err('CLIP load failed:', e); | err('CLIP load failed:', e); | ||
| Zeile 119: | Zeile 128: | ||
})(); | })(); | ||
return | return _visionLoadPromise; | ||
} | } | ||
| Zeile 145: | Zeile 154: | ||
} | } | ||
const { | const { processor, model } = await ensureClipVision(); | ||
setStatus('Bild vorbereiten …'); setProgress(0.20); | setStatus('Bild vorbereiten …'); setProgress(0.20); | ||
| Zeile 153: | Zeile 162: | ||
setStatus('Bild analysieren …'); setProgress(0.38); | setStatus('Bild analysieren …'); setProgress(0.38); | ||
const | // Processor wandelt Canvas → pixel_values Tensor | ||
const | const inputs = await processor(canvas, { return_tensors: 'pt' }); | ||
const out = await model.forward({ pixel_values: inputs.pixel_values }); | |||
// Embedding: image_embeds (Float32Array auf .data) | |||
const vec = out?.image_embeds?.data || out?.image_embeds; | |||
if (!(vec instanceof Float32Array)) { | |||
throw new Error('Embedding-Format unerwartet (kein Float32Array).'); | |||
} | |||
return normalize(vec); | return normalize(vec); | ||
} | } | ||
function normalize(v){ | function normalize(v){ | ||
let n=0; for(let i=0;i<v.length;i++) n+=v[i]*v[i]; | let n=0; for(let i=0;i<v.length;i++) n+=v[i]*v[i]; | ||
| Zeile 269: | Zeile 267: | ||
await loadIndex({ ui:true }); | await loadIndex({ ui:true }); | ||
if (CFG.forceModelWarmup) await | |||
if (CFG.forceModelWarmup) { | |||
await ensureClipVision(); | |||
} | |||
if(!INDEX_EMB.some(v=>v&&v.length)){ | if(!INDEX_EMB.some(v=>v&&v.length)){ | ||