MediaWiki:Gadget-LabelScan.js
Erscheinungsbild
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
'use strict';
// === ADOS: Kategorien & Tokens (ggf. erweitern) ===========================
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
const KNOWN_TOKENS = [
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
];
// === UI Präsenz ===========================================================
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
// === UI Helpers ===========================================================
function setStatus (t) {
var el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function setProgress (p) {
var bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
var url = URL.createObjectURL(file);
var prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
// === Tesseract bei Bedarf laden ==========================================
var tesseractReady;
function ensureTesseract () {
if (tesseractReady) return tesseractReady;
tesseractReady = new Promise(function (resolve, reject) {
if (window.Tesseract) return resolve();
var s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = resolve;
s.onerror = function () {
var s2 = document.createElement('script');
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async = true;
s2.onload = resolve;
s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
document.head.appendChild(s2);
};
document.head.appendChild(s);
});
return tesseractReady;
}
// === Bild-Vorverarbeitung (für bessere OCR) ===============================
async function preprocessImage (file) {
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = rej;
o.src = URL.createObjectURL(file);
});
// Längste Kante auf ~1800px skalieren (schärfer für OCR)
const MAX = 1800;
const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d');
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
// Graustufen + leichter Kontrastboost
const id = ctx.getImageData(0, 0, w, h);
const d = id.data;
for (let i = 0; i < d.length; i += 4) {
const g = 0.2126 * d[i] + 0.7152 * d[i + 1] + 0.0722 * d[i + 2];
const v = Math.max(0, Math.min(255, (g - 128) * 1.15 + 128));
d[i] = d[i + 1] = d[i + 2] = v;
}
ctx.putImageData(id, 0, 0);
return c; // Canvas an Tesseract übergeben
}
// === OCR (nutzt Vorverarbeitung) =========================================
async function runOCR (file) {
await ensureTesseract();
setProgress(0);
const canvas = await preprocessImage(file);
const res = await Tesseract.recognize(canvas, 'deu+eng', {
// psm 6: ein Block Text – robust für Label
tessedit_pageseg_mode: 6,
preserve_interword_spaces: 1,
logger: function (m) {
if (m && m.status === 'recognizing text' && typeof m.progress === 'number') {
setProgress(m.progress);
}
}
});
setProgress(null);
return (res && res.data && res.data.text) || '';
}
// === Hinweise aus OCR extrahieren ========================================
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
// Distillery-/Marken-Token, die wirklich im Text vorkommen
const foundNames = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(raw)) foundNames.push(t);
});
// Alter: 12 years, 12 yo, 12-year-old, 14 Jahre
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
const ages = [];
let m;
while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }
// Jahrgänge
const yearRe = /\b(19|20)\d{2}\b/g;
const years = [];
while ((m = yearRe.exec(raw)) !== null) { if (!years.includes(m[0])) years.push(m[0]); }
// ein paar „Promi-Wörter“
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq = new Set(); let w; const words = [];
while ((w = wordRe.exec(raw)) !== null) {
const s = w[0];
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
}
return { names: foundNames, ages, years, words, raw };
}
// === Smarte Wiki-Suche (3 Pässe) =========================================
async function searchWikiSmart (hints, limit) {
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
function incatStr () {
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
}
// PASS 1: intitle-Kombis (präzise)
const pass1 = [];
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
pass1.push(`intitle:"${n}" ${incatStr()}`);
});
}
// PASS 2: gewichtete Volltextsuche
const key = []
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
.map(x => `"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
// PASS 3: Prefix auf Titel
const pass3 = [];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSr (q) {
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
(r.query?.search || []).forEach(it => {
const k = it.title;
if (seen.has(k)) return;
seen.add(k);
out.push(it);
});
}
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
// Prefix (list=prefixsearch)
for (const p of pass3) {
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it => {
const title = it.title || it['*'];
const k = title;
if (seen.has(k)) return;
seen.add(k);
out.push({ title, snippet: '' });
});
if (out.length >= MAX) break;
}
return out.slice(0, MAX);
}
// === HTML Escaping & Treffer-Rendering ===================================
function esc (s) { return mw.html.escape(String(s || '')); }
function renderResults (items) {
var box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(function (it) {
var title = it.title || '';
var link = mw.util.getUrl(title.replace(/ /g, '_'));
var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
var div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="' + link + '">' + esc(title) + '</a></b>' +
(snip ? '<div class="meta">' + snip + '</div>' : '');
box.appendChild(div);
});
}
// === Binding ==============================================================
var BOUND = false;
function bind () {
if (BOUND || !hasUI()) return;
var runBtn = document.getElementById('ados-scan-run');
var fileIn = document.getElementById('ados-scan-file');
var bigBtn = document.getElementById('ados-scan-bigbtn');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1'; BOUND = true;
if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
fileIn.addEventListener('change', function () {
if (this.files && this.files[0]) showPreview(this.files[0]);
});
runBtn.addEventListener('click', async function (ev) {
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
var f = fileIn.files[0];
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Erkenne Label …');
var text = await runOCR(f);
setStatus('Suche im Wiki …');
var hints = extractHints(text);
var hits = await searchWikiSmart(hints, 12);
renderResults(hits);
setStatus('Fertig.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
}
});
// Sicherheit gegen Overlays
var wrap = document.getElementById('ados-labelscan');
if (wrap) wrap.style.position = 'relative';
runBtn.style.position = 'relative';
runBtn.style.zIndex = '9999';
runBtn.style.pointerEvents = 'auto';
}
// Erstbindung + Fallbacks + Observer
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
setTimeout(bind, 250);
setTimeout(bind, 1000);
var mo = new MutationObserver(function () { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();