MediaWiki:Gadget-LabelScan.js
Erscheinungsbild
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
'use strict';
// =============================
// KONFIGURATION
// =============================
// ← Für Tests leer lassen: const ADOS_CATEGORIES = [];
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
const KNOWN_TOKENS = [
// Distillery / Herkunft / Regionen
'Ireland','Irland','Irish','Single Malt','Bourbon Barrel',
'Cask Strength','1st Fill','First Fill',
'Aged','Years','Yo',
// ADOS Serien / Motivserien
'A Dream of Scotland','A Dream of Ireland',
'The Tasteful 8','Heroes of Childhood',
'Space Girls','Fine Art of Whisky','The Fine Art of Whisky',
'Friendly Mr. Z','Whiskytainment','Rumbastic',
// Häufige Motivwörter
'Unicorn','Bull','Hero','Childhood',
// Distillery Namen, universell
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn',
'Lagavulin','Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet',
'Inchgower','Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia'
];
// =============================
// UI-Hilfen
// =============================
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
function setStatus (t) {
var el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function setProgress (p) {
var bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
var url = URL.createObjectURL(file);
var prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
function showOCRText (t) {
var el = document.getElementById('ados-scan-ocr');
if (el) el.textContent = (t || '').trim();
}
// =============================
// Tesseract laden (nur 1x)
// =============================
var tesseractReady;
function ensureTesseract () {
if (tesseractReady) return tesseractReady;
tesseractReady = new Promise(function (resolve, reject) {
if (window.Tesseract) return resolve();
var s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = resolve;
s.onerror = function () {
var s2 = document.createElement('script');
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async = true;
s2.onload = resolve;
s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
document.head.appendChild(s2);
};
document.head.appendChild(s);
});
return tesseractReady;
}
// =============================
// Vorverarbeitung (OCR)
// Graustufen + Unsharp + adaptive Schwelle
// =============================
async function preprocessImage (file) {
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = rej;
o.src = URL.createObjectURL(file);
});
const MAX = 1800;
const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height);
const w = Math.round(img.width * s), h = Math.round(img.height * s);
const c = document.createElement('canvas'); c.width = w; c.height = h;
const g = c.getContext('2d', { willReadFrequently: true });
g.imageSmoothingEnabled = true;
g.drawImage(img, 0, 0, w, h);
// → Graustufen
let id = g.getImageData(0, 0, w, h), d = id.data;
for (let i=0;i<d.length;i+=4){
const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
d[i]=d[i+1]=d[i+2]=y;
}
g.putImageData(id, 0, 0);
// → Unsharp (leichter Hochpass)
id = g.getImageData(0,0,w,h); d = id.data;
const copy = new Uint8ClampedArray(d);
const idx = (x,y)=>4*(y*w+x);
for (let y=1;y<h-1;y++){
for (let x=1;x<w-1;x++){
const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)],
d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)];
const lap = 4*a - b - c0 - d0 - e;
const v = Math.max(0, Math.min(255, a + 0.3*lap));
d[i0]=d[i0+1]=d[i0+2]=v;
}
}
g.putImageData(id,0,0);
// → adaptive Schwelle (lokaler Mittelwert)
const win = 25, half = (win|0);
id = g.getImageData(0,0,w,h); d = id.data;
for (let y=0;y<h;y++){
for (let x=0;x<w;x++){
let sum=0, cnt=0;
for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){
for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){
sum += d[4*(yy*w+xx)];
cnt++;
}
}
const thr = (sum/cnt) - 6;
const i = 4*(y*w+x);
const v = d[i] < thr ? 0 : 255;
d[i]=d[i+1]=d[i+2]=v;
}
}
g.putImageData(id,0,0);
return c;
}
// Hilfsfunktionen für Varianten
function crop(canvas, x, y, w, h){
const c = document.createElement('canvas'); c.width=w; c.height=h;
c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h);
return c;
}
function rotate(canvas, deg){
const r = document.createElement('canvas');
const ctx = r.getContext('2d');
if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; }
else { r.width=canvas.height; r.height=canvas.width; }
ctx.translate(r.width/2, r.height/2);
ctx.rotate(deg*Math.PI/180);
ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2);
return r;
}
async function ocrOne(canvas, lang) {
const res = await Tesseract.recognize(canvas, lang, {
// Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke)
tessedit_pageseg_mode: 11,
preserve_interword_spaces: 1
});
return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 };
}
// =============================
// Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache
// =============================
async function runOCR(file){
await ensureTesseract();
setProgress(0.01);
const base = await preprocessImage(file);
// Kandidatenflächen
const variants = [];
variants.push(base); // komplett
variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte
variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner
// + Rotationen
const more = [];
for (const v of variants){
more.push(v, rotate(v, 90), rotate(v, -90));
}
// zwei Sprachmodi testen
const results = [];
for (const canv of more){
for (const lang of ['deu+eng','eng']){
try {
const r = await ocrOne(canv, lang);
results.push(r);
} catch(e){ /* einzelne Fehlschläge ignorieren */ }
}
}
setProgress(null);
results.sort((a,b)=> (b.conf||0)-(a.conf||0));
return (results[0]?.text)||'';
}
// =============================
// Hinweise aus OCR
// =============================
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
// Speziell für "The Tasteful 8 / Heroes of Childhood"
if (/TASTEFUL\s*8/i.test(raw)) {
if (!raw.includes('The Tasteful 8')) raw += ' The Tasteful 8';
}
if (/HEROES\s+OF\s+CHILDHOOD/i.test(raw)) {
if (!raw.includes('Heroes of Childhood')) raw += ' Heroes of Childhood';
}
const foundNames = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(raw)) foundNames.push(t);
});
const ages = [];
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
let m;
while ((m = ageRe.exec(raw)) !== null) {
const n = m[1]; if (!ages.includes(n)) ages.push(n);
}
const years = [];
const yearRe = /\b(19|20)\d{2}\b/g;
while ((m = yearRe.exec(raw)) !== null) {
if (!years.includes(m[0])) years.push(m[0]);
}
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq = new Set(); let w; const words = [];
while ((w = wordRe.exec(raw)) !== null) {
const s = w[0];
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
}
return { names: foundNames, ages, years, words, raw };
}
// =============================
// Suche (3 Pässe) + Fallbacks
// =============================
function esc (s) { return mw.html.escape(String(s || '')); }
function incatStr () {
return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' ');
}
async function searchWikiSmart (hints, limit) {
await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']);
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
// PASS 1: intitle-Kombis (präzise)
const pass1 = [];
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
pass1.push(`intitle:"${n}" ${incatStr()}`);
});
}
// PASS 2: gewichtete Volltextsuche
const key = []
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
.map(x => `"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
// PASS 3: Prefix auf Titel
const pass3 = [];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSr (q) {
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
(r.query?.search || []).forEach(it => {
const k = it.title;
if (seen.has(k)) return;
seen.add(k);
out.push(it);
});
}
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
// Prefix (list=prefixsearch)
for (const p of pass3) {
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it => {
const title = it.title || it['*'];
const k = title;
if (seen.has(k)) return;
seen.add(k);
out.push({ title, snippet: '' });
});
if (out.length >= MAX) break;
}
return out.slice(0, MAX);
}
// ganz einfacher Fuzzy-Fallback auf Suchergebnissen
function scoreTitle(title, hints){
const t = String(title||'').toLowerCase();
let s = 0;
hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; });
hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; });
hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; });
hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; });
return s;
}
async function fallbackFuzzyTitles(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const MAX = limit || 12;
// Breite Suche mit Tokens (mit/ohne Kategorie)
const q1 = []
.concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`).join(' ');
const q = `${q1} ${incatStr()}`.trim();
const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 });
const items = (r.query?.search || []);
const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) }));
scored.sort((a,b)=> b._score - a._score);
const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger
return top;
}
async function broadSearchNoCategory(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const MAX = limit || 12;
const parts = []
.concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`);
const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');
const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 });
return (r.query?.search || []);
}
// =============================
// Ergebnisse rendern
// =============================
function renderResults (items) {
var box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(function (it) {
var title = it.title || '';
var link = mw.util.getUrl(title.replace(/ /g, '_'));
var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
var div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="' + link + '">' + esc(title) + '</a></b>' +
(snip ? '<div class="meta">' + snip + '</div>' : '');
box.appendChild(div);
});
}
// =============================
// Binding
// =============================
var BOUND = false;
function bind () {
if (BOUND || !hasUI()) return;
var runBtn = document.getElementById('ados-scan-run');
var fileIn = document.getElementById('ados-scan-file');
var bigBtn = document.getElementById('ados-scan-bigbtn');
var form = document.getElementById('ados-scan-form');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1'; BOUND = true;
if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
fileIn.addEventListener('change', function () {
if (this.files && this.files[0]) showPreview(this.files[0]);
});
function onSubmit(ev){
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
var f = fileIn.files[0];
(async function(){
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Erkenne Label …');
const text = await runOCR(f);
showOCRText(text);
setStatus('Suche im Wiki …');
const hints = extractHints(text);
let hits = await searchWikiSmart(hints, 12);
if (!hits || !hits.length) {
setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
hits = await fallbackFuzzyTitles(hints, 12);
}
if (!hits || !hits.length) {
setStatus('Kein Treffer – breite Suche ohne Kategorien …');
hits = await broadSearchNoCategory(hints, 12);
}
renderResults(hits);
setStatus('Fertig.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
}
})();
}
runBtn.addEventListener('click', onSubmit);
if (form) form.addEventListener('submit', onSubmit);
// Sicherheit
var wrap = document.getElementById('ados-labelscan');
if (wrap) wrap.style.position = 'relative';
runBtn.style.position = 'relative';
runBtn.style.zIndex = '9999';
runBtn.style.pointerEvents = 'auto';
}
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
setTimeout(bind, 250);
setTimeout(bind, 1000);
var mo = new MutationObserver(function () { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();