MediaWiki:Gadget-LabelScan.js
Erscheinungsbild
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
'use strict';
// ========== ADOS: Kategorien & Distillery-Tokens ==========
const ADOS_CATEGORIES = []; // <- Filter AUS zum Testen
const KNOWN_TOKENS = [
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
];
// ========== Utility: Normalisierung & Fuzzy ==========
function norm(s){
return String(s||'')
.toLowerCase()
.normalize('NFD').replace(/[\u0300-\u036f]/g,'')
.replace(/[^a-z0-9\s\-]/g,' ')
.replace(/\s+/g,' ')
.trim();
}
function levenshtein(a,b){
a = norm(a); b = norm(b);
const m = a.length, n = b.length;
if (!m) return n; if (!n) return m;
const dp = new Array(n+1);
for (let j=0;j<=n;j++) dp[j]=j;
for (let i=1;i<=m;i++){
let prev = dp[0], tmp; dp[0]=i;
for (let j=1;j<=n;j++){
tmp = dp[j];
dp[j] = (a[i-1]===b[j-1]) ? prev : 1 + Math.min(prev, dp[j-1], dp[j]);
prev = tmp;
}
}
return dp[n];
}
function tokenSet(str){ return new Set(norm(str).split(' ').filter(Boolean)); }
function overlapScore(a,b){
const A = tokenSet(a), B = tokenSet(b);
if (!A.size || !B.size) return 0;
let inter=0; A.forEach(t=>{ if (B.has(t)) inter++; });
return inter / Math.max(A.size, B.size);
}
function escHTML (s) {
return String(s ?? '')
.replace(/&/g,'&')
.replace(/</g,'<')
.replace(/>/g,'>')
.replace(/"/g,'"')
.replace(/'/g,''');
}
// ========== UI Präsenz & Helfer ==========
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
function setStatus (t) {
var el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function showOCRText (t) {
var el = document.getElementById('ados-scan-ocr');
if (el) el.textContent = (t || '').trim();
}
function setProgress (p) {
var bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
var url = URL.createObjectURL(file);
var prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
// ========== Tesseract bei Bedarf laden ==========
var tesseractReady;
function ensureTesseract () {
if (tesseractReady) return tesseractReady;
tesseractReady = new Promise(function (resolve, reject) {
if (window.Tesseract) return resolve();
var s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = resolve;
s.onerror = function () {
var s2 = document.createElement('script');
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async = true;
s2.onload = resolve;
s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
document.head.appendChild(s2);
};
document.head.appendChild(s);
});
return tesseractReady;
}
// ========== Bild-Vorverarbeitung (Canvas) ==========
async function preprocessImage (file) {
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = rej;
o.src = URL.createObjectURL(file);
});
// Längste Kante auf ~1800px skalieren (besser für OCR)
const MAX = 1800;
const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d');
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
// Graustufen + leichter Kontrastboost
const id = ctx.getImageData(0, 0, w, h);
const d = id.data;
for (let i = 0; i < d.length; i += 4) {
const g = 0.2126 * d[i] + 0.7152 * d[i + 1] + 0.0722 * d[i + 2];
const v = Math.max(0, Math.min(255, (g - 128) * 1.15 + 128));
d[i] = d[i + 1] = d[i + 2] = v;
}
ctx.putImageData(id, 0, 0);
return c; // Canvas an Tesseract übergeben
}
// ========== OCR ==========
async function runOCR (file) {
await ensureTesseract();
setProgress(0);
const canvas = await preprocessImage(file);
try {
const res = await Tesseract.recognize(canvas, 'deu+eng', {
tessedit_pageseg_mode: 6,
preserve_interword_spaces: 1,
logger: m => { if (m?.status==='recognizing text' && typeof m.progress==='number') setProgress(m.progress); }
});
setProgress(null);
return (res && res.data && res.data.text) || '';
} catch (e) {
// Fallback nur ENG
console.warn('[LabelScan] deu+eng fehlgeschlagen, versuche eng:', e);
const res = await Tesseract.recognize(canvas, 'eng', {
tessedit_pageseg_mode: 6,
preserve_interword_spaces: 1,
logger: m => { if (m?.status==='recognizing text' && typeof m.progress==='number') setProgress(m.progress); }
});
setProgress(null);
return (res && res.data && res.data.text) || '';
}
}
// ========== Hinweise aus OCR extrahieren ==========
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
// Distillery-/Marken-Token
const foundNames = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(raw)) foundNames.push(t);
});
// Alter
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
const ages = []; let m;
while ((m = ageRe.exec(raw)) !== null){ const n = m[1]; if (!ages.includes(n)) ages.push(n); }
// Jahrgänge
const yearRe = /\b(19|20)\d{2}\b/g;
const years=[]; while ((m = yearRe.exec(raw)) !== null){ if(!years.includes(m[0])) years.push(m[0]); }
// ABV
const abvRe = /\b(\d{2}(?:[.,]\d)?)\s?%\b/g;
const abvs=[]; while ((m = abvRe.exec(raw)) !== null){ abvs.push(m[1].replace(',','.')); }
// Serien-Keywords (leichtgewichtig)
const seriesMap = [
['A Dream of Scotland','dream of scotland'],
['A Dream of Ireland','dream of ireland'],
['A Dream of... – Der Rest der Welt','rest der welt','dream of','rest of the world'],
['Friendly Mr. Z Whiskytainment Abfüllungen','friendly mr z','mr z'],
['Die Whisky Elfen Abfüllungen','whisky elfen','elfen'],
['The Fine Art of Whisky Abfüllungen','fine art of whisky','fine art'],
['Alle Rumbastic Abfüllungen','rumbastic']
];
const low = norm(raw); const series=[];
seriesMap.forEach(([label, hint])=>{
if (low.includes(norm(hint))) series.push(label);
});
// „Promi-Wörter“
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq = new Set(); let w; const words=[];
while ((w = wordRe.exec(raw)) !== null){
const s = w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; }
}
return { names: foundNames, ages, years, words, abvs, series, raw };
}
// ========== Smarte Wiki-Suche (3 Pässe) ==========
async function searchWikiSmart (hints, limit) {
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
function incatStr () {
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
}
async function broadSearchNoCategory(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
// baue eine breite Suchphrase aus Distillery/Alter/Jahr/Wörtern
const parts = []
.concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`);
const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');
const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
return (r.query?.search || []);
}
// PASS 1: intitle-Kombis (präzise)
const pass1 = [];
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
pass1.push(`intitle:"${n}" ${incatStr()}`);
});
}
// PASS 2: gewichtete Volltextsuche
const key = []
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
.map(x => `"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
// PASS 3: Prefix auf Titel
const pass3 = [];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSr (q) {
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
(r.query?.search || []).forEach(it => {
const k = it.title;
if (seen.has(k)) return;
seen.add(k);
out.push(it);
});
}
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
// Prefix (list=prefixsearch)
for (const p of pass3) {
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it=>{
const title = it.title || it['*'];
const k = title; if (seen.has(k)) return; seen.add(k);
out.push({ title, snippet: '' });
});
if (out.length >= MAX) break;
}
return out.slice(0, MAX);
}
// ========== Titel aus Kategorien + Fuzzy-Fallback ==========
async function fetchTitlesFromCategories(){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const titles = new Set();
for (const cat of ADOS_CATEGORIES){
let cmcontinue = undefined;
do {
const r = await api.get({
action:'query',
list:'categorymembers',
cmtitle:'Kategorie:' + cat,
cmtype:'page',
cmlimit:'max',
cmcontinue
});
(r.query?.categorymembers||[]).forEach(it=> titles.add(it.title));
cmcontinue = r.continue && r.continue.cmcontinue;
} while (cmcontinue);
}
return Array.from(titles);
}
function scoreTitleAgainstHints(title, hints){
const normTitle = norm(title);
// 1) Token-Overlap
const base = overlapScore(title, hints.raw);
// 2) Distillery-Boost
let nameBoost = 0;
hints.names.forEach(n=>{
const t = norm(n);
const d = levenshtein(normTitle, t);
if (normTitle.includes(t)) nameBoost = Math.max(nameBoost, 0.35);
else if (d <= 4) nameBoost = Math.max(nameBoost, 0.2);
});
// 3) Alter/Jahr/ABV im Titel
let numBoost = 0;
hints.ages.forEach(a=>{
if (new RegExp('\\b'+a+'\\b').test(normTitle)) numBoost = Math.max(numBoost, 0.15);
});
hints.years.forEach(y=>{
if (normTitle.includes(y)) numBoost = Math.max(numBoost, 0.15);
});
hints.abvs.forEach(p=>{
if (normTitle.includes(p.replace('.',''))) numBoost = Math.max(numBoost, 0.1);
});
// 4) Serien-Bonus
let seriesBoost = 0;
hints.series.forEach(s=>{
const key = norm(s.split(' Abfüllungen')[0]); // Kern
if (normTitle.includes(key)) seriesBoost = Math.max(seriesBoost, 0.15);
});
// 5) leichte Strafe bei sehr kleinem Overlap
const penalty = base < 0.2 ? -0.05 : 0;
return Math.max(0, base + nameBoost + numBoost + seriesBoost + penalty);
}
async function fallbackFuzzyTitles(hints, limit){
const titles = await fetchTitlesFromCategories();
const scored = titles.map(t => ({ title: t, _score: scoreTitleAgainstHints(t, hints) }));
scored.sort((a,b)=> b._score - a._score);
const top = scored.slice(0, limit||12).filter(x=> x._score >= 0.10);
return top.map(x=> ({ title: x.title, snippet: '' }));
}
// ========== Treffer-Rendering ==========
function renderResults (items) {
var box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(function (it) {
var title = it.title || '';
var link = (mw && mw.util) ? mw.util.getUrl(title.replace(/ /g, '_')) : ('/wiki/' + encodeURIComponent(title.replace(/ /g,'_')));
var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
var div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="' + link + '">' + escHTML(title) + '</a></b>' +
(snip ? '<div class="meta">' + snip + '</div>' : '');
box.appendChild(div);
});
}
// ========== Binding ==========
var BOUND = false;
function bind () {
if (BOUND || !hasUI()) return;
var runBtn = document.getElementById('ados-scan-run');
var fileIn = document.getElementById('ados-scan-file');
var bigBtn = document.getElementById('ados-scan-bigbtn');
var form = document.getElementById('ados-scan-form');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1'; BOUND = true;
if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
fileIn.addEventListener('change', function () {
if (this.files && this.files[0]) showPreview(this.files[0]);
});
if (form) {
form.addEventListener('submit', function(ev){
ev.preventDefault();
runBtn.click();
});
}
runBtn.addEventListener('click', async function (ev) {
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
var f = fileIn.files[0];
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Erkenne Label …');
var text = await runOCR(f);
showOCRText(text);
setStatus('Suche im Wiki …');
var hints = extractHints(text);
var hits = await searchWikiSmart(hints, 12);
if (!hits || !hits.length) {
setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
hits = await fallbackFuzzyTitles(hints, 12);
}
if (!hits || !hits.length) {
setStatus('Kein Treffer – breite Suche ohne Kategorien …');
hits = await broadSearchNoCategory(hints, 12);
}
renderResults(hits);
setStatus(hits && hits.length ? 'Fertig.' : 'Keine klaren Treffer.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
}
});
// Sicherheit gegen Overlays
var wrap = document.getElementById('ados-labelscan');
if (wrap) wrap.style.position = 'relative';
runBtn.style.position = 'relative';
runBtn.style.zIndex = '9999';
runBtn.style.pointerEvents = 'auto';
}
// Erstbindung + Fallbacks + Observer
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
setTimeout(bind, 250);
setTimeout(bind, 1000);
var mo = new MutationObserver(function () { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();