MediaWiki:Gadget-LabelScan.js
Erscheinungsbild
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
'use strict';
// ========= KONFIG =========
// Wenn du enger in Kategorien suchen willst, trage sie hier ein.
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
// Wörter, die oft auf ADOS-Labels stehen und uns beim Fuzzy-Match helfen
const KNOWN_TOKENS = [
// Marken/Distilleries (Auszug – erweiterbar)
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia',
// Serien/ADOS-Sprache
'A Dream of Scotland','A Dream of Ireland','The Tasteful 8','Heroes of Childhood',
'Cask Strength','Single Malt','Unicorn','Space Girls','Whisky Elfen',
'The Fine Art of Whisky','Friendly Mr. Z','Rumbastic'
];
// ========= UI HILFSFUNKTIONEN =========
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
function setStatus (t) {
const el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function setProgress (p) {
const bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
const url = URL.createObjectURL(file);
const prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
const dbg = (msg) => { try { console.log('[LabelScan]', msg); } catch(e){} };
// ========= TESSERACT WORKER (einmalig) =========
let workerPromise = null;
function ensureWorker () {
if (workerPromise) return workerPromise;
workerPromise = new Promise((resolve, reject) => {
if (window.Tesseract && Tesseract.createWorker) {
const worker = Tesseract.createWorker({
logger: m => {
if (m?.status === 'recognizing text' && typeof m.progress === 'number') {
setProgress(m.progress);
}
}
});
(async () => {
try {
await worker.load();
await worker.loadLanguage('eng+deu'); // englisch + deutsch
await worker.initialize('eng+deu');
// OCR-Parameter: eher „Block Text“
await worker.setParameters({
tessedit_pageseg_mode: '6', // PSM 6: ein Block mit Text
preserve_interword_spaces: '1',
user_defined_dpi: '300'
});
resolve(worker);
} catch (e) {
reject(e);
}
})();
} else {
// Fallback: Bibliothek nachladen
const s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = () => {
if (!Tesseract?.createWorker) return reject(new Error('Tesseract lädt, aber createWorker fehlt'));
resolve(ensureWorker());
};
s.onerror = () => reject(new Error('Tesseract konnte nicht geladen werden'));
document.head.appendChild(s);
}
});
return workerPromise;
}
// ========= BILD-VORVERARBEITUNG =========
function toCanvasFromImage (img, maxSide) {
const MAX = maxSide || 1800;
const scale = Math.min(1, (img.width > img.height) ? (MAX / img.width) : (MAX / img.height));
const w = Math.max(1, Math.round(img.width * scale));
const h = Math.max(1, Math.round(img.height * scale));
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d', { willReadFrequently: true });
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
return c;
}
function grayscaleContrastUnsharp (canvas) {
const ctx = canvas.getContext('2d', { willReadFrequently: true });
const { width: w, height: h } = canvas;
const id = ctx.getImageData(0, 0, w, h);
const d = id.data;
// 1) Graustufen + Kontrast
let min=255, max=0;
const gray = new Uint8ClampedArray(w*h);
for (let i=0, j=0; i<d.length; i+=4, j++) {
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
gray[j] = g;
if (g<min) min=g; if (g>max) max=g;
}
const span = Math.max(1, max-min);
for (let j=0; j<gray.length; j++) {
let v = (gray[j]-min)/span; // 0..1
v = Math.pow(v, 0.9); // leichte Gamma-Korrektur
gray[j] = Math.max(0, Math.min(255, Math.round(v*255)));
}
// 2) Leichtes Unsharp Mask
// einfacher 3x3-Box-Blur und dann Original + Amount*(Original-Blur)
const blur = new Uint8ClampedArray(gray.length);
const off = [-w-1,-w,-w+1, -1,0,1, w-1,w,w+1];
for (let y=1;y<h-1;y++){
for (let x=1;x<w-1;x++){
let s=0;
const idx=y*w+x;
for (let k=0;k<9;k++) s += gray[idx+off[k]];
blur[idx] = s/9;
}
}
const amount=0.65;
for (let i=0;i<gray.length;i++){
let v = gray[i] + amount*(gray[i] - (blur[i]||gray[i]));
gray[i] = v<0?0:v>255?255:v;
}
// 3) Adaptive Schwelle light (global + lokale Korrektur)
// global threshold um den Mittelwert, dann leichte Aufhellung dunkler Zeichen
let sum=0; for (let i=0;i<gray.length;i++) sum+=gray[i];
const mean = sum/gray.length;
for (let i=0, p=0; i<d.length; i+=4, p++) {
const v = gray[p] < mean*0.97 ? 0 : 255; // binär
d[i]=d[i+1]=d[i+2]=v; d[i+3]=255;
}
ctx.putImageData(id, 0, 0);
return canvas;
}
function rotateCanvas (src, deg) {
const rad = deg * Math.PI/180;
const w = src.width, h = src.height;
const swap = (deg % 180) !== 0;
const c = document.createElement('canvas');
c.width = swap ? h : w;
c.height = swap ? w : h;
const ctx = c.getContext('2d');
ctx.translate(c.width/2, c.height/2);
ctx.rotate(rad);
ctx.drawImage(src, -w/2, -h/2);
return c;
}
function cropCanvas (src, x, y, w, h) {
const c = document.createElement('canvas');
c.width = w; c.height = h;
c.getContext('2d').drawImage(src, x, y, w, h, 0, 0, w, h);
return c;
}
function buildCandidates (base) {
// Vollbild + zentrale & untere Bänder, je Rotation 0/+90/-90
const L = [];
const rotations = [0, 90, -90];
rotations.forEach((deg) => {
const r = deg ? rotateCanvas(base, deg) : base;
const w = r.width, h = r.height;
const full = grayscaleContrastUnsharp(r.cloneNode ? r.cloneNode(true) : r);
L.push(full);
// zentral ~70% Bereich
const cw = Math.round(w*0.8), ch = Math.round(h*0.7);
const cx = Math.round((w-cw)/2), cy = Math.round((h-ch)/2);
L.push(grayscaleContrastUnsharp(cropCanvas(r, cx, cy, cw, ch)));
// unteres Band (viele ADOS haben unten Textblöcke)
const bh = Math.round(h*0.38);
L.push(grayscaleContrastUnsharp(cropCanvas(r, 0, h-bh, w, bh)));
});
return L;
}
// ========= OCR PIPELINE =========
async function runOCR (file) {
const worker = await ensureWorker();
setProgress(0);
// Bild laden → Canvas → Kandidaten erzeugen
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = () => rej(new Error('Bild konnte nicht geladen werden'));
o.src = URL.createObjectURL(file);
});
const base = toCanvasFromImage(img, 1800);
const candidates = buildCandidates(base);
let best = { text: '', conf: 0 };
// Zwei PSM-Modi probieren (6 → Block, 7 → eine Textzeile – robust gegen plakative Schriften)
const PSMs = ['6','7'];
for (const c of candidates) {
for (const psm of PSMs) {
try {
await worker.setParameters({ tessedit_pageseg_mode: psm });
const { data } = await worker.recognize(c);
const text = data?.text ? String(data.text) : '';
const conf = (data?.confidence || 0);
// Heuristik: genug Buchstaben?
const letters = (text.match(/[A-Za-zÄÖÜäöüß]{2,}/g) || []).length;
const score = conf + letters*1.5;
if (score > (best.conf + (best.letters||0)*1.5)) {
best = { text, conf, letters };
}
// Wenn sehr gut: früh abbrechen
if (conf > 75 && letters > 15) break;
} catch (e) {
// einfach nächsten Kandidaten probieren
}
}
}
setProgress(null);
// Debug-Ausgabe
const dbgEl = document.getElementById('ados-scan-ocr');
if (dbgEl) dbgEl.textContent = best.text || '(kein Text erkannt)';
return best.text || '';
}
// ========= HINWEISE EXTRAHIEREN & SUCHE =========
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
const names = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(raw)) names.push(t);
});
const ages = [];
let m; const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }
const years = [];
const yearRe = /\b(19|20)\d{2}\b/g;
while ((m = yearRe.exec(raw)) !== null) { const y = m[0]; if (!years.includes(y)) years.push(y); }
const words = [];
const seen = new Set();
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
while ((m = wordRe.exec(raw)) !== null) { const w = m[0]; if (!seen.has(w)) { seen.add(w); words.push(w); if (words.length>=8) break; } }
return { names, ages, years, words, raw };
}
async function searchWikiSmart (hints, limit) {
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0, MAX = limit || 12;
const incats = ADOS_CATEGORIES.length
? ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ')
: '';
const buckets = [];
// PASS 1: sehr eng (intitle + Kategorien)
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => buckets.push(`intitle:"${n}" intitle:${a} ${incats}`.trim()));
if (hints.years.length) hints.years.forEach(y => buckets.push(`intitle:"${n}" "${y}" ${incats}`.trim()));
buckets.push(`intitle:"${n}" ${incats}`.trim());
});
}
// PASS 2: keyword-bündel
const key = [].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x => `"${x}"`).join(' ');
if (key) buckets.push(`${key} ${incats}`.trim());
// PASS 3: ohne Kategorien (breiter Fallback)
if (ADOS_CATEGORIES.length) {
if (hints.names.length) hints.names.forEach(n => buckets.push(`intitle:"${n}"`));
if (key) buckets.push(key);
}
// PASS 4: Prefix
if (hints.names.length) buckets.push(hints.names[0]);
else if (hints.words.length) buckets.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSearch (q) {
if (!q) return;
const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace: ns0, srlimit: MAX, formatversion:2 });
(r.query?.search || []).forEach(it => { if (!seen.has(it.title)) { seen.add(it.title); out.push(it); } });
}
for (const q of buckets) {
await runSearch(q);
if (out.length >= MAX) break;
}
if (out.length < MAX) {
// Prefix-Fallback
const p = hints.names[0] || hints.words[0] || '';
if (p) {
const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it => {
const title = it.title || it['*'];
if (!seen.has(title)) { seen.add(title); out.push({ title, snippet:'' }); }
});
}
}
return out.slice(0, MAX);
}
function esc (s) { return mw.html.escape(String(s||'')); }
function renderResults (items) {
const box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(it => {
const title = it.title || '';
const link = mw.util.getUrl(title.replace(/ /g,'_'));
const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
const div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="'+link+'">'+esc(title)+'</a></b>' +
(snip ? '<div class="meta">'+snip+'</div>' : '');
box.appendChild(div);
});
}
// ========= EVENT-BINDING =========
let BOUND=false;
function bind() {
if (BOUND || !hasUI()) return;
const runBtn = document.getElementById('ados-scan-run');
const bigBtn = document.getElementById('ados-scan-bigbtn');
const fileIn = document.getElementById('ados-scan-file');
if (!runBtn || !fileIn) return;
BOUND=true;
if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
fileIn.addEventListener('change', function(){
if (this.files && this.files[0]) showPreview(this.files[0]);
});
runBtn.addEventListener('click', async function (ev) {
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Vorverarbeitung & Texterkennung …');
const text = await runOCR(fileIn.files[0]);
setStatus('Suche im Wiki …');
const hints = extractHints(text);
const hits = await searchWikiSmart(hints, 12);
renderResults(hits);
setStatus('Fertig.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen';
}
});
}
if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind);
else bind();
setTimeout(bind, 250); setTimeout(bind, 1000);
new MutationObserver(() => { if (!BOUND) bind(); })
.observe(document.documentElement || document.body, { childList:true, subtree:true });
})();