MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 1: | Zeile 1: | ||
/* | /* LabelScan – Bildähnlichkeit statt OCR | ||
* Benötigt keine Server-Backends. Läuft komplett im Browser. | |||
* Erstellt einen lokalen Index (CLIP-Embeddings) aus allen Abfüllungsbildern in deinen Kategorien. | |||
* Autor: ADOS-Wiki Setup | |||
*/ | |||
/* global mw */ | |||
(() => { | |||
'use strict'; | |||
// ---------- KONFIG ---------- | |||
// Kategorien (genau so, wie sie im Wiki heißen) | |||
const | const CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | |||
'Alle A Dream of Ireland Abfüllungen', | |||
'Alle A Dream of... – Der Rest der Welt Abfüllungen', | |||
'Cigar Malt Übersicht', | |||
'Rumbastic Abfüllungen', | |||
'The Tasteful 8', | |||
'Còmhlan Abfüllungen', | |||
'Friendly Mr. Z Whiskytainment Abfüllungen', | |||
'Die Whisky Elfen Abfüllungen', | |||
'The Fine Art of Whisky Abfüllungen', | |||
'The Forbidden Kingdom', | |||
'Sonderabfüllungen' | |||
]; | |||
// Wie groß sollen die Thumbnails fürs Einbetten sein? | |||
const THUMB_SIZE = 512; | |||
function setStatus(t) { | // Wieviele Vorschläge anzeigen? | ||
const TOP_K = 8; | |||
// IndexedDB-Store (bei Strukturänderung die VERSION erhöhen) | |||
const IDB = { name: 'ados-labelscan', store: 'index', version: 1 }; | |||
// ---------- MINI UI HELPERS ---------- | |||
const $ = (sel) => document.querySelector(sel); | |||
function setStatus(t) { const el = $('#ados-scan-status'); if (el) el.textContent = t || ''; } | |||
function setProgress(p) { | |||
const bar = $('#ados-scan-progress'); | |||
if (!bar) return; | |||
if (p == null) { bar.hidden = true; bar.value = 0; } | |||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | |||
} | } | ||
function showPreview(file) { | |||
function | |||
const url = URL.createObjectURL(file); | const url = URL.createObjectURL(file); | ||
const box = $('#ados-scan-preview'); | |||
if (box) box.innerHTML = `<img alt="Vorschau" src="${url}" style="max-width:260px;border-radius:8px">`; | |||
} | |||
function renderResults(items) { | |||
const box = $('#ados-scan-results'); | |||
if (!box) return; | |||
box.innerHTML = ''; | |||
if (!items || !items.length) { | |||
box.innerHTML = '<div class="ados-hit">Keine Treffer gefunden.</div>'; | |||
return; | |||
} | |||
for (const it of items) { | |||
const url = mw.util.getUrl(it.title.replace(/ /g,'_')); | |||
const div = document.createElement('div'); | |||
div.className = 'ados-hit'; | |||
div.innerHTML = ` | |||
<a class="thumb" href="${url}"><img alt="" src="${it.thumb}" loading="lazy"></a> | |||
<div class="meta"> | |||
<b><a href="${url}">${mw.html.escape(it.title)}</a></b> | |||
<div class="sub">Ähnlichkeit: ${(it.score*100).toFixed(1)}%</div> | |||
</div>`; | |||
box.appendChild(div); | |||
} | |||
} | } | ||
// | // ---------- INDEXEDDB (sehr klein gehalten) ---------- | ||
function | |||
function idbOpen() { | |||
return new Promise((resolve, reject) => { | |||
const req = indexedDB.open(IDB.name, IDB.version); | |||
req.onupgradeneeded = (e) => { | |||
const db = req.result; | |||
if (e.oldVersion < 1) db.createObjectStore(IDB.store, { keyPath: 'key' }); | |||
}; | |||
req.onsuccess = () => resolve(req.result); | |||
req.onerror = () => reject(req.error); | |||
}); | |||
} | |||
async function idbGet(key) { | |||
const db = await idbOpen(); | |||
return new Promise((resolve, reject) => { | |||
const tx = db.transaction(IDB.store, 'readonly'); | |||
const st = tx.objectStore(IDB.store); | |||
const req = st.get(key); | |||
req.onsuccess = () => resolve(req.result ? req.result.val : null); | |||
req.onerror = () => reject(req.error); | |||
}); | |||
} | |||
async function idbSet(key, val) { | |||
const db = await idbOpen(); | |||
return new Promise((resolve, reject) => { | return new Promise((resolve, reject) => { | ||
const tx = db.transaction(IDB.store, 'readwrite'); | |||
const | const st = tx.objectStore(IDB.store); | ||
const req = st.put({ key, val, ts: Date.now() }); | |||
req.onsuccess = () => resolve(); | |||
req.onerror = () => reject(req.error); | |||
}); | }); | ||
} | } | ||
// ---------- TRANSFORMERS / CLIP ---------- | |||
return | let clipReady = null; | ||
async function ensureCLIP() { | |||
if (clipReady) return clipReady; | |||
clipReady = (async () => { | |||
}) | // Laden als ES-Module | ||
const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@3.0.0/dist/transformers.min.js'); | |||
// Image-Feature-Extraktion (CLIP) | |||
const extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32'); | |||
return { extractor }; | |||
})(); | |||
return clipReady; | |||
} | } | ||
// | // Normierung & Ähnlichkeit | ||
function | function l2norm(vec) { | ||
let s=0; for (let i=0;i<vec.length;i++) s += vec[i]*vec[i]; | |||
const k = 1/Math.sqrt(s||1); for (let i=0;i<vec.length;i++) vec[i]*=k; | |||
return vec; | |||
} | |||
function cosine(a, b) { | |||
let s=0; for (let i=0;i<a.length;i++) s += a[i]*b[i]; | |||
return Math.max(0, Math.min(1, (s+1)/2)); // hübscher 0..1 | |||
} | |||
// ---------- MEDIAWIKI API ---------- | |||
async function apiGet(params) { | |||
await mw.loader.using('mediawiki.api'); | |||
const api = new mw.Api(); | |||
return api.get(params); | |||
} | |||
async function pagesFromCategory(cat) { | |||
} | const pages = []; | ||
let cont = undefined; | |||
do { | |||
const res = await apiGet({ | |||
action: 'query', | |||
list: 'categorymembers', | |||
cmtitle: 'Category:' + cat, | |||
cmtype: 'page', | |||
cmlimit: 'max', | |||
...(cont || {}) | |||
}); | |||
for (const it of (res.query?.categorymembers || [])) { | |||
pages.push(it.title); | |||
} | |||
cont = res.continue; | |||
} while (cont); | |||
return pages; | |||
} | |||
async function | async function pageThumbs(titles) { | ||
const out = []; | |||
// In Batches abfragen | |||
const chunk = (arr, n) => arr.length ? [arr.slice(0,n), ...chunk(arr.slice(n), n)] : []; | |||
for (const batch of chunk(titles, 40)) { | |||
const res = await apiGet({ | |||
action: 'query', | |||
prop: 'pageimages', | |||
piprop: 'thumbnail', | |||
pithumbsize: THUMB_SIZE, | |||
titles: batch.join('|'), | |||
formatversion: 2 | |||
}); | |||
for (const p of (res.query?.pages || [])) { | |||
const th = p.thumbnail?.source; | |||
if (th) out.push({ title: p.title, thumb: th }); | |||
} | |||
} | |||
return out; | |||
} | |||
async function buildGallery() { | |||
// Alle Titles aus allen Kategorien | |||
const titlesSet = new Set(); | |||
for (const cat of CATEGORIES) { | |||
const list = await pagesFromCategory(cat); | |||
list.forEach(t => titlesSet.add(t)); | |||
} | |||
const titles = Array.from(titlesSet); | |||
const withThumbs = await pageThumbs(titles); | |||
return withThumbs; // [{title, thumb}] | |||
} | |||
// ---------- INDEX AUFBAUEN / LADEN ---------- | |||
async function ensureIndex(updateProgress) { | |||
// Versuche aus IDB | |||
let idx = await idbGet('index-v1'); | |||
if (idx && Array.isArray(idx.items) && idx.items.length) { | |||
updateProgress?.(1, 1, 'Index aus Cache'); | |||
return idx; | |||
} | |||
// Neu aufbauen | |||
updateProgress?.(0, 1, 'Lade Wiki-Bilder …'); | |||
const gallery = await buildGallery(); | |||
if (!gallery.length) return { items: [] }; | |||
const { extractor } = await ensureCLIP(); | |||
function | const items = []; | ||
if (! | for (let i = 0; i < gallery.length; i++) { | ||
const g = gallery[i]; | |||
try { | |||
updateProgress?.(i, gallery.length, `Embedding ${i+1}/${gallery.length}: ${g.title}`); | |||
const emb = await extractor(g.thumb, { pooling: 'mean', normalize: true }); // Float32Array | |||
// In normales Array konvertieren (IDB-kompatibel) | |||
items.push({ title: g.title, thumb: g.thumb, vec: Array.from(emb.data) }); | |||
} catch (e) { | |||
// Ignore einzelne Fehlschläge | |||
console.warn('[LabelScan] Embedding fail for', g.title, e); | |||
} | |||
} | |||
const index = { builtAt: Date.now(), items }; | |||
await idbSet('index-v1', index); | |||
updateProgress?.(1, 1, 'Index gespeichert'); | |||
return index; | |||
} | |||
// ---------- SUCHE ---------- | |||
async function runSearch(file) { | |||
setProgress(0); setStatus('Baue/ lade Bild-Index …'); | |||
const index = await ensureIndex((i, n, msg) => { | |||
setStatus(msg || 'Erstelle Index …'); setProgress(n ? i/n : null); | |||
}); | |||
if (!index.items.length) { | |||
renderResults([]); setProgress(null); | |||
setStatus('Kein Bildmaterial gefunden.'); | |||
return; | return; | ||
} | } | ||
setStatus('Berechne Embedding vom Foto …'); setProgress(0.05); | |||
). | const { extractor } = await ensureCLIP(); | ||
// Datei in DataURL umwandeln, damit @xenova/transformers sie laden kann | |||
const dataURL = await new Promise((res, rej) => { | |||
const r = new FileReader(); | |||
r.onload = () => res(r.result); | |||
r.onerror = rej; | |||
r.readAsDataURL(file); | |||
}); | |||
const q = await extractor(dataURL, { pooling: 'mean', normalize: true }); | |||
const qVec = q.data; // Float32Array, bereits normalisiert | |||
setStatus('Finde ähnlichste Abfüllungen …'); setProgress(0.15); | |||
// Scores | |||
const scored = index.items.map(it => ({ | |||
title: it.title, | |||
thumb: it.thumb, | |||
score: cosine(qVec, it.vec) | |||
})); | |||
scored.sort((a,b) => b.score - a.score); | |||
const top = scored.slice(0, TOP_K); | |||
renderResults(top); | |||
setProgress(null); setStatus('Fertig.'); | |||
} | } | ||
// ---------- BINDING ---------- | |||
function bind() { | |||
if (! | const runBtn = $('#ados-scan-run'); | ||
const fileIn = $('#ados-scan-file'); | |||
const bigBtn = $('#ados-scan-bigbtn'); | |||
if (!runBtn || !fileIn) return; | |||
if (runBtn.dataset.bound === '1') return; | |||
runBtn.dataset.bound = '1'; | |||
if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click()); | |||
fileIn.addEventListener('change', function () { if (this.files && this.files[0]) showPreview(this.files[0]); }); | |||
runBtn.addEventListener('click', async (ev) => { | |||
ev.preventDefault(); | |||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | |||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | |||
try { await runSearch(fileIn.files[0]); } | |||
catch (e) { console.error(e); setStatus('Fehler. Bitte erneut versuchen.'); } | |||
finally { runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; } | |||
}); | |||
} | } | ||
// Automatisch binden | |||
}); | if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind); | ||
else bind(); | |||
new MutationObserver(bind).observe(document.documentElement, { childList: true, subtree: true }); | |||
})(); | })(); | ||
Version vom 6. November 2025, 21:18 Uhr
/* LabelScan – Bildähnlichkeit statt OCR
* Benötigt keine Server-Backends. Läuft komplett im Browser.
* Erstellt einen lokalen Index (CLIP-Embeddings) aus allen Abfüllungsbildern in deinen Kategorien.
* Autor: ADOS-Wiki Setup
*/
/* global mw */
(() => {
'use strict';
// ---------- KONFIG ----------
// Kategorien (genau so, wie sie im Wiki heißen)
const CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Cigar Malt Übersicht',
'Rumbastic Abfüllungen',
'The Tasteful 8',
'Còmhlan Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'The Forbidden Kingdom',
'Sonderabfüllungen'
];
// Wie groß sollen die Thumbnails fürs Einbetten sein?
const THUMB_SIZE = 512;
// Wieviele Vorschläge anzeigen?
const TOP_K = 8;
// IndexedDB-Store (bei Strukturänderung die VERSION erhöhen)
const IDB = { name: 'ados-labelscan', store: 'index', version: 1 };
// ---------- MINI UI HELPERS ----------
const $ = (sel) => document.querySelector(sel);
function setStatus(t) { const el = $('#ados-scan-status'); if (el) el.textContent = t || ''; }
function setProgress(p) {
const bar = $('#ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview(file) {
const url = URL.createObjectURL(file);
const box = $('#ados-scan-preview');
if (box) box.innerHTML = `<img alt="Vorschau" src="${url}" style="max-width:260px;border-radius:8px">`;
}
function renderResults(items) {
const box = $('#ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine Treffer gefunden.</div>';
return;
}
for (const it of items) {
const url = mw.util.getUrl(it.title.replace(/ /g,'_'));
const div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML = `
<a class="thumb" href="${url}"><img alt="" src="${it.thumb}" loading="lazy"></a>
<div class="meta">
<b><a href="${url}">${mw.html.escape(it.title)}</a></b>
<div class="sub">Ähnlichkeit: ${(it.score*100).toFixed(1)}%</div>
</div>`;
box.appendChild(div);
}
}
// ---------- INDEXEDDB (sehr klein gehalten) ----------
function idbOpen() {
return new Promise((resolve, reject) => {
const req = indexedDB.open(IDB.name, IDB.version);
req.onupgradeneeded = (e) => {
const db = req.result;
if (e.oldVersion < 1) db.createObjectStore(IDB.store, { keyPath: 'key' });
};
req.onsuccess = () => resolve(req.result);
req.onerror = () => reject(req.error);
});
}
async function idbGet(key) {
const db = await idbOpen();
return new Promise((resolve, reject) => {
const tx = db.transaction(IDB.store, 'readonly');
const st = tx.objectStore(IDB.store);
const req = st.get(key);
req.onsuccess = () => resolve(req.result ? req.result.val : null);
req.onerror = () => reject(req.error);
});
}
async function idbSet(key, val) {
const db = await idbOpen();
return new Promise((resolve, reject) => {
const tx = db.transaction(IDB.store, 'readwrite');
const st = tx.objectStore(IDB.store);
const req = st.put({ key, val, ts: Date.now() });
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
});
}
// ---------- TRANSFORMERS / CLIP ----------
let clipReady = null;
async function ensureCLIP() {
if (clipReady) return clipReady;
clipReady = (async () => {
// Laden als ES-Module
const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@3.0.0/dist/transformers.min.js');
// Image-Feature-Extraktion (CLIP)
const extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
return { extractor };
})();
return clipReady;
}
// Normierung & Ähnlichkeit
function l2norm(vec) {
let s=0; for (let i=0;i<vec.length;i++) s += vec[i]*vec[i];
const k = 1/Math.sqrt(s||1); for (let i=0;i<vec.length;i++) vec[i]*=k;
return vec;
}
function cosine(a, b) {
let s=0; for (let i=0;i<a.length;i++) s += a[i]*b[i];
return Math.max(0, Math.min(1, (s+1)/2)); // hübscher 0..1
}
// ---------- MEDIAWIKI API ----------
async function apiGet(params) {
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
return api.get(params);
}
async function pagesFromCategory(cat) {
const pages = [];
let cont = undefined;
do {
const res = await apiGet({
action: 'query',
list: 'categorymembers',
cmtitle: 'Category:' + cat,
cmtype: 'page',
cmlimit: 'max',
...(cont || {})
});
for (const it of (res.query?.categorymembers || [])) {
pages.push(it.title);
}
cont = res.continue;
} while (cont);
return pages;
}
async function pageThumbs(titles) {
const out = [];
// In Batches abfragen
const chunk = (arr, n) => arr.length ? [arr.slice(0,n), ...chunk(arr.slice(n), n)] : [];
for (const batch of chunk(titles, 40)) {
const res = await apiGet({
action: 'query',
prop: 'pageimages',
piprop: 'thumbnail',
pithumbsize: THUMB_SIZE,
titles: batch.join('|'),
formatversion: 2
});
for (const p of (res.query?.pages || [])) {
const th = p.thumbnail?.source;
if (th) out.push({ title: p.title, thumb: th });
}
}
return out;
}
async function buildGallery() {
// Alle Titles aus allen Kategorien
const titlesSet = new Set();
for (const cat of CATEGORIES) {
const list = await pagesFromCategory(cat);
list.forEach(t => titlesSet.add(t));
}
const titles = Array.from(titlesSet);
const withThumbs = await pageThumbs(titles);
return withThumbs; // [{title, thumb}]
}
// ---------- INDEX AUFBAUEN / LADEN ----------
async function ensureIndex(updateProgress) {
// Versuche aus IDB
let idx = await idbGet('index-v1');
if (idx && Array.isArray(idx.items) && idx.items.length) {
updateProgress?.(1, 1, 'Index aus Cache');
return idx;
}
// Neu aufbauen
updateProgress?.(0, 1, 'Lade Wiki-Bilder …');
const gallery = await buildGallery();
if (!gallery.length) return { items: [] };
const { extractor } = await ensureCLIP();
const items = [];
for (let i = 0; i < gallery.length; i++) {
const g = gallery[i];
try {
updateProgress?.(i, gallery.length, `Embedding ${i+1}/${gallery.length}: ${g.title}`);
const emb = await extractor(g.thumb, { pooling: 'mean', normalize: true }); // Float32Array
// In normales Array konvertieren (IDB-kompatibel)
items.push({ title: g.title, thumb: g.thumb, vec: Array.from(emb.data) });
} catch (e) {
// Ignore einzelne Fehlschläge
console.warn('[LabelScan] Embedding fail for', g.title, e);
}
}
const index = { builtAt: Date.now(), items };
await idbSet('index-v1', index);
updateProgress?.(1, 1, 'Index gespeichert');
return index;
}
// ---------- SUCHE ----------
async function runSearch(file) {
setProgress(0); setStatus('Baue/ lade Bild-Index …');
const index = await ensureIndex((i, n, msg) => {
setStatus(msg || 'Erstelle Index …'); setProgress(n ? i/n : null);
});
if (!index.items.length) {
renderResults([]); setProgress(null);
setStatus('Kein Bildmaterial gefunden.');
return;
}
setStatus('Berechne Embedding vom Foto …'); setProgress(0.05);
const { extractor } = await ensureCLIP();
// Datei in DataURL umwandeln, damit @xenova/transformers sie laden kann
const dataURL = await new Promise((res, rej) => {
const r = new FileReader();
r.onload = () => res(r.result);
r.onerror = rej;
r.readAsDataURL(file);
});
const q = await extractor(dataURL, { pooling: 'mean', normalize: true });
const qVec = q.data; // Float32Array, bereits normalisiert
setStatus('Finde ähnlichste Abfüllungen …'); setProgress(0.15);
// Scores
const scored = index.items.map(it => ({
title: it.title,
thumb: it.thumb,
score: cosine(qVec, it.vec)
}));
scored.sort((a,b) => b.score - a.score);
const top = scored.slice(0, TOP_K);
renderResults(top);
setProgress(null); setStatus('Fertig.');
}
// ---------- BINDING ----------
function bind() {
const runBtn = $('#ados-scan-run');
const fileIn = $('#ados-scan-file');
const bigBtn = $('#ados-scan-bigbtn');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1';
if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
fileIn.addEventListener('change', function () { if (this.files && this.files[0]) showPreview(this.files[0]); });
runBtn.addEventListener('click', async (ev) => {
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
try { await runSearch(fileIn.files[0]); }
catch (e) { console.error(e); setStatus('Fehler. Bitte erneut versuchen.'); }
finally { runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; }
});
}
// Automatisch binden
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind);
else bind();
new MutationObserver(bind).observe(document.documentElement, { childList: true, subtree: true });
})();