MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 1: | Zeile 1: | ||
/* LabelScan – Bildähnlichkeit | /* LabelScan – Bildähnlichkeit mit CLIP (UMD Loader, robust) | ||
Lädt @xenova/transformers (UMD) per <script>, baut einen lokalen Bildindex | |||
aus Abfüllungs-Thumbnails und sucht die ähnlichsten Seiten. | |||
*/ | |||
/* global mw */ | /* global mw */ | ||
(() => { | (() => { | ||
'use strict'; | 'use strict'; | ||
// | // ======== KONFIG ======== | ||
const CATEGORIES = [ | const CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | 'Alle A Dream of Scotland Abfüllungen', | ||
| Zeile 25: | Zeile 22: | ||
'Sonderabfüllungen' | 'Sonderabfüllungen' | ||
]; | ]; | ||
const THUMB_SIZE = 512; | const THUMB_SIZE = 512; | ||
const TOP_K = 8; | const TOP_K = 8; | ||
const IDB = { name: 'ados-labelscan', store: 'index', version: 2 }; // <- Version erhöht | |||
const UMD_URL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@3.0.0/dist/transformers.umd.min.js'; | |||
const | |||
// | // ======== UI Helpers ======== | ||
const $ = (s) => document.querySelector(s); | |||
const $ = ( | function setStatus(t){ const el = $('#ados-scan-status'); if (el) el.textContent = t || ''; } | ||
function setStatus(t) { const el = $('#ados-scan-status'); if (el) el.textContent = t || ''; } | function setProgress(p){ const bar=$('#ados-scan-progress'); if(!bar) return; if(p==null){bar.hidden=true;bar.value=0;}else{bar.hidden=false;bar.value=Math.max(0,Math.min(1,p));}} | ||
function setProgress(p) { | function showPreview(file){ const url=URL.createObjectURL(file); const box=$('#ados-scan-preview'); if(box) box.innerHTML=`<img alt="Vorschau" src="${url}" style="max-width:260px;border-radius:8px">`; } | ||
function renderResults(items){ | |||
const box = $('#ados-scan-results'); if(!box) return; box.innerHTML=''; | |||
if(!items || !items.length){ box.innerHTML='<div class="ados-hit">Keine Treffer gefunden.</div>'; return; } | |||
items.forEach(it=>{ | |||
function showPreview(file) { | |||
function renderResults(items) { | |||
const box = $('#ados-scan-results'); | |||
if (!items || !items.length) { | |||
const url = mw.util.getUrl(it.title.replace(/ /g,'_')); | const url = mw.util.getUrl(it.title.replace(/ /g,'_')); | ||
const div = document.createElement('div'); | const div = document.createElement('div'); | ||
| Zeile 64: | Zeile 42: | ||
div.innerHTML = ` | div.innerHTML = ` | ||
<a class="thumb" href="${url}"><img alt="" src="${it.thumb}" loading="lazy"></a> | <a class="thumb" href="${url}"><img alt="" src="${it.thumb}" loading="lazy"></a> | ||
<div class="meta"> | <div class="meta"><b><a href="${url}">${mw.html.escape(it.title)}</a></b> | ||
<div class="sub">Ähnlichkeit: ${(it.score*100).toFixed(1)}%</div></div>`; | |||
box.appendChild(div); | box.appendChild(div); | ||
} | }); | ||
} | } | ||
// | // ======== IndexedDB Mini ======== | ||
function idbOpen(){ | |||
function idbOpen() { | return new Promise((res,rej)=>{ | ||
return new Promise(( | |||
const req = indexedDB.open(IDB.name, IDB.version); | const req = indexedDB.open(IDB.name, IDB.version); | ||
req.onupgradeneeded = (e) => { | req.onupgradeneeded = (e)=>{ const db=req.result; if(e.oldVersion<1) db.createObjectStore(IDB.store,{keyPath:'key'}); }; | ||
req.onsuccess=()=>res(req.result); req.onerror=()=>rej(req.error); | |||
req.onsuccess = () => | |||
}); | }); | ||
} | } | ||
async function idbSet(key, val) { | async function idbGet(key){ const db=await idbOpen(); return new Promise((res,rej)=>{ const tx=db.transaction(IDB.store,'readonly'); const st=tx.objectStore(IDB.store); const r=st.get(key); r.onsuccess=()=>res(r.result?r.result.val:null); r.onerror=()=>rej(r.error);});} | ||
async function idbSet(key,val){ const db=await idbOpen(); return new Promise((res,rej)=>{ const tx=db.transaction(IDB.store,'readwrite'); const st=tx.objectStore(IDB.store); const r=st.put({key,val,ts:Date.now()}); r.onsuccess=()=>res(); r.onerror=()=>rej(r.error);});} | |||
let clipReady = null; | // ======== Transformers (UMD) laden ======== | ||
let clipReady=null; | |||
function ensureTransformersUMD(){ | |||
if (clipReady) return clipReady; | if (clipReady) return clipReady; | ||
clipReady = ( | clipReady = new Promise((resolve, reject)=>{ | ||
if (window.transformers && window.transformers.pipeline) return resolve(window.transformers); | |||
const | const s=document.createElement('script'); | ||
s.src = UMD_URL; | |||
s.async = true; | |||
s.onload = ()=> resolve(window.transformers); | |||
} | s.onerror = ()=> reject(new Error('Transformers UMD konnte nicht geladen werden (CSP/CDN geblockt?)')); | ||
document.head.appendChild(s); | |||
}); | |||
return clipReady; | return clipReady; | ||
} | } | ||
// | // ======== Mathe ======== | ||
function cosine(a,b){ let s=0; for(let i=0;i<a.length;i++) s += a[i]*b[i]; // Vektoren sind schon normalisiert | |||
// auf 0..1 hübschen (optional) | |||
return Math.max(0, Math.min(1, (s+1)/2)); | |||
function cosine(a, b) { | |||
return Math.max(0, Math.min(1, (s+1)/2)); | |||
} | } | ||
// | // ======== MW API ======== | ||
async function apiGet(p){ await mw.loader.using('mediawiki.api'); const api=new mw.Api(); return api.get(p); } | |||
async function apiGet( | async function pagesFromCategory(cat){ | ||
const pages=[]; let cont; do{ | |||
const r=await apiGet({ action:'query', list:'categorymembers', cmtitle:'Category:'+cat, cmtype:'page', cmlimit:'max', ...(cont||{}) }); | |||
(r.query?.categorymembers||[]).forEach(it=>pages.push(it.title)); | |||
cont=r.continue; | |||
}while(cont); | |||
async function pagesFromCategory(cat) { | |||
const pages = []; | |||
const | |||
cont = | |||
} while (cont); | |||
return pages; | return pages; | ||
} | } | ||
async function pageThumbs(titles){ | |||
async function pageThumbs(titles) { | const out=[]; const chunk=(a,n)=>a.length?[a.slice(0,n),...chunk(a.slice(n),n)]:[]; | ||
const out = []; | for(const batch of chunk(titles,40)){ | ||
const r = await apiGet({ action:'query', prop:'pageimages', piprop:'thumbnail', pithumbsize:THUMB_SIZE, titles:batch.join('|'), formatversion:2 }); | |||
(r.query?.pages||[]).forEach(p=>{ const th=p.thumbnail?.source; if(th) out.push({title:p.title, thumb:th}); }); | |||
for (const batch of chunk(titles, 40)) { | |||
const | |||
} | } | ||
return out; | return out; | ||
} | } | ||
async function buildGallery(){ | |||
async function buildGallery() { | const set=new Set(); | ||
for(const c of CATEGORIES){ const list=await pagesFromCategory(c); list.forEach(t=>set.add(t)); } | |||
const | const titles=[...set]; | ||
for (const | return pageThumbs(titles); | ||
const titles = | |||
} | } | ||
// | // ======== Index bauen/laden ======== | ||
async function ensureIndex(report){ | |||
async function ensureIndex( | let idx = await idbGet('index-v2'); | ||
if (idx?.items?.length){ report?.(1,1,'Index aus Cache'); return idx; } | |||
let idx = await idbGet('index- | |||
if (idx | |||
report?.(0,1,'Lade Wiki-Bilder …'); | |||
const gallery = await buildGallery(); | const gallery = await buildGallery(); | ||
if (!gallery.length) return { items: [] }; | if (!gallery.length) return { items: [] }; | ||
const | setStatus('Lade CLIP-Modell … (einmalig)'); | ||
const tf = await ensureTransformersUMD(); | |||
// Optional: Pfade für WASM setzen (nur Info-Log) | |||
try { | |||
tf.env.allowRemoteModels = true; // default, aber explizit | |||
} catch(e){ /* ignore */ } | |||
const items = []; | const pipe = await tf.pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32'); | ||
for (let i = 0; i < gallery.length; i++) { | |||
const g = gallery[i]; | const items=[]; | ||
for(let i=0;i<gallery.length;i++){ | |||
const g=gallery[i]; | |||
const emb = await | report?.(i, gallery.length, `Embedding ${i+1}/${gallery.length}: ${g.title}`); | ||
try{ | |||
items.push({ title: g.title, thumb: g.thumb, vec: Array.from(emb.data) }); | const emb = await pipe(g.thumb, { pooling:'mean', normalize:true }); // Float32Array | ||
} catch (e) { | items.push({ title:g.title, thumb:g.thumb, vec:Array.from(emb.data) }); | ||
}catch(e){ | |||
console.warn('[LabelScan] Embedding | console.warn('[LabelScan] Embedding-Fehler', g.title, e); | ||
} | } | ||
} | } | ||
const index={ builtAt:Date.now(), items }; | |||
const index = { builtAt: Date.now(), items }; | await idbSet('index-v2', index); | ||
await idbSet('index- | report?.(1,1,'Index gespeichert'); | ||
return index; | return index; | ||
} | } | ||
// | // ======== Suche ======== | ||
async function runSearch(file){ | |||
async function runSearch(file) { | |||
setProgress(0); setStatus('Baue/ lade Bild-Index …'); | setProgress(0); setStatus('Baue/ lade Bild-Index …'); | ||
const index = await ensureIndex((i, n, msg) => { | const index = await ensureIndex((i,n,msg)=>{ setStatus(msg||'Index…'); setProgress(n? i/n : null); }); | ||
if(!index.items.length){ renderResults([]); setProgress(null); setStatus('Kein Bildmaterial gefunden.'); return; } | |||
if (!index.items.length) { | |||
setStatus('Berechne Embedding vom Foto …'); setProgress(0. | setStatus('Berechne Embedding vom Foto …'); setProgress(0.1); | ||
const | const tf = await ensureTransformersUMD(); | ||
const pipe = await tf.pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32'); | |||
const dataURL = await new Promise((res,rej)=>{ const r=new FileReader(); r.onload=()=>res(r.result); r.onerror=rej; r.readAsDataURL(file); }); | |||
const dataURL = await new Promise((res, rej) => { | |||
const q = await | const q = await pipe(dataURL, { pooling:'mean', normalize:true }); | ||
const | const qv = q.data; | ||
setStatus('Finde ähnlichste Abfüllungen …'); setProgress(0. | setStatus('Finde ähnlichste Abfüllungen …'); setProgress(0.2); | ||
const scored = index.items.map(it=>({ title:it.title, thumb:it.thumb, score:cosine(qv, it.vec) })) | |||
.sort((a,b)=>b.score-a.score) | |||
const scored = index.items.map(it => ({ | .slice(0, TOP_K); | ||
renderResults(scored); | |||
renderResults( | |||
setProgress(null); setStatus('Fertig.'); | setProgress(null); setStatus('Fertig.'); | ||
} | } | ||
// ------ | // ======== Binding ======== | ||
function bind(){ | |||
const runBtn=$('#ados-scan-run'), fileIn=$('#ados-scan-file'), bigBtn=$('#ados-scan-bigbtn'); | |||
if(!runBtn||!fileIn) return; | |||
if(runBtn.dataset.bound==='1') return; runBtn.dataset.bound='1'; | |||
bigBtn && bigBtn.addEventListener('click', ()=>fileIn.click()); | |||
fileIn.addEventListener('change', function(){ if(this.files && this.files[0]) showPreview(this.files[0]); }); | |||
runBtn.addEventListener('click', async (ev)=>{ | |||
runBtn.addEventListener('click', async (ev) => { | |||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if(!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | runBtn.disabled=true; runBtn.textContent='Erkenne …'; | ||
try { await runSearch(fileIn.files[0]); } | try { await runSearch(fileIn.files[0]); } | ||
catch (e) { console.error(e); setStatus('Fehler. | catch(e){ | ||
finally { runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; } | console.error('[LabelScan] Fehler:', e); | ||
setStatus( | |||
'Fehler beim Laden/Verarbeiten. Prüfe Konsole. '+ | |||
'Häufige Ursachen: CDN/CSP blockiert oder Netzwerk.' | |||
); | |||
} finally { | |||
runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen'; | |||
} | |||
}); | }); | ||
} | } | ||
if(document.readyState==='loading') document.addEventListener('DOMContentLoaded', bind); else bind(); | |||
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind); | new MutationObserver(bind).observe(document.documentElement, { childList:true, subtree:true }); | ||
new MutationObserver(bind).observe(document.documentElement, { childList: true, subtree: true }); | |||
})(); | })(); | ||