MediaWiki:Gadget-LabelScan.js
Erscheinungsbild
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw, Tesseract */
(function () {
'use strict';
// ========================================================================
// KONFIG
// ========================================================================
// In welchen Kategorien wird gesucht?
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
// Häufige Distillery-/Serien-Tokens (zum „Einhaken“ in die Suche)
const KNOWN_TOKENS = [
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland',
'A Dream of Scotland','A Dream of Ireland','The Fine Art of Whisky',
'The Tasteful 8','Friendly Mr. Z','Die Whisky Elfen','Rumbastic'
];
// Debug: Roh-OCR unten anzeigen, wenn true
window.ADOS_SCAN_DEBUG = window.ADOS_SCAN_DEBUG || false;
// ========================================================================
// DOM-Helfer
// ========================================================================
function byId(id){ return document.getElementById(id); }
function hasUI(){
return !!byId('ados-scan-run') && !!byId('ados-scan-file');
}
function setStatus(t){ const el = byId('ados-scan-status'); if (el) el.textContent = t || ''; }
function setProgress(p){
const bar = byId('ados-scan-progress'); if (!bar) return;
if (p == null){ bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview(file){
const url = URL.createObjectURL(file);
const prev = byId('ados-scan-preview');
if (prev){ prev.innerHTML = '<img alt="Vorschau" src="'+url+'">'; prev.setAttribute('aria-hidden','false'); }
}
// ========================================================================
// TESSERACT sauber als WORKER laden (deu+eng)
// ========================================================================
let _ocrWorkerPromise = null;
function getOcrWorker(){
if (_ocrWorkerPromise) return _ocrWorkerPromise;
_ocrWorkerPromise = (async () => {
if (!window.Tesseract){
await new Promise((res, rej) => {
const s=document.createElement('script');
s.src='https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async=true; s.onload=res; s.onerror=() => {
const s2=document.createElement('script');
s2.src='https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async=true; s2.onload=res; s2.onerror=rej; document.head.appendChild(s2);
};
document.head.appendChild(s);
});
}
const { createWorker } = Tesseract;
const worker = await createWorker({
workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core.wasm.js',
langPath: 'https://tessdata.projectnaptha.com/5',
logger: m => {
if (m && m.status === 'recognizing text' && typeof m.progress === 'number'){
setProgress(0.05 + m.progress * 0.9);
}
}
});
await worker.loadLanguage('deu+eng');
await worker.initialize('deu+eng');
await worker.setParameters({ tessedit_pageseg_mode: '11', user_defined_dpi: '300' });
return worker;
})();
return _ocrWorkerPromise;
}
// ========================================================================
// Bild-Vorverarbeitung: Skalierung + adaptives Thresholding
// ========================================================================
function scaleToCanvas(img, maxSide = 2000){
const s = Math.min(1, maxSide / Math.max(img.width, img.height));
const w = Math.round(img.width * s), h = Math.round(img.height * s);
const c = document.createElement('canvas'); c.width=w; c.height=h;
const ctx=c.getContext('2d'); ctx.imageSmoothingEnabled = true;
ctx.drawImage(img,0,0,w,h);
return c;
}
function adaptiveThreshold(src){
const w=src.width, h=src.height;
const out=document.createElement('canvas'); out.width=w; out.height=h;
const sctx=src.getContext('2d'), octx=out.getContext('2d');
const id=sctx.getImageData(0,0,w,h), d=id.data;
const gray=new Uint8ClampedArray(w*h);
for(let i=0,j=0;i<d.length;i+=4,++j){
gray[j]=(0.2126*d[i]+0.7152*d[i+1]+0.0722*d[i+2])|0;
}
const S=new Uint32Array((w+1)*(h+1));
for(let y=1;y<=h;y++){
let row=0;
for(let x=1;x<=w;x++){
const v=gray[(y-1)*w + (x-1)];
row+=v; S[y*(w+1)+x]=S[(y-1)*(w+1)+x]+row;
}
}
const win=Math.max(15, Math.round(Math.min(w,h)/24));
const outId=octx.createImageData(w,h), od=outId.data;
const C=7;
for(let y=0;y<h;y++){
const y0=Math.max(0,y-win), y1=Math.min(h-1,y+win);
for(let x=0;x<w;x++){
const x0=Math.max(0,x-win), x1=Math.min(w-1,x+win);
const A=S[y0*(w+1)+x0], B=S[(y1+1)*(w+1)+x0], Cc=S[y0*(w+1)+(x1+1)], Dd=S[(y1+1)*(w+1)+(x1+1)];
const area=(x1-x0+1)*(y1-y0+1);
const mean=(Dd + A - B - Cc)/area;
const g=gray[y*w+x];
const pix = g < (mean - C) ? 0 : 255;
const k=(y*w+x)*4; od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
}
}
octx.putImageData(outId,0,0);
return out;
}
// ========================================================================
// OCR: mehrere Strategien (PSM 11 → 6 → 4, binarisiert & farbig)
// ========================================================================
async function runOCR(file){
setProgress(0.02);
const img = await new Promise((res, rej) => {
const o=new Image(); o.onload=()=>res(o); o.onerror=rej;
o.src=URL.createObjectURL(file);
});
const base = scaleToCanvas(img, 2000);
const bin = adaptiveThreshold(base);
const worker = await getOcrWorker();
const candidates = [
{ canvas: bin, psm: '11' },
{ canvas: base, psm: '11' },
{ canvas: bin, psm: '6' },
{ canvas: base, psm: '6' },
{ canvas: bin, psm: '4' },
{ canvas: base, psm: '4' }
];
let best = '';
for (let i=0;i<candidates.length;i++){
const c = candidates[i];
await worker.setParameters({ tessedit_pageseg_mode: c.psm });
const { data } = await worker.recognize(c.canvas);
const txt = (data && data.text ? data.text : '').trim();
if (txt.length > best.length) best = txt;
if (best.length > 40) break;
setProgress(0.96 + i * 0.008);
}
setProgress(null);
if (window.ADOS_SCAN_DEBUG){
const box = byId('ados-scan-ocr');
if (box) box.textContent = best || '(leer)';
}
return best;
}
// ========================================================================
// Hinweise aus OCR destillieren
// ========================================================================
function extractHints(text){
const raw = String(text||'').replace(/\s+/g,' ').trim();
const names = [];
KNOWN_TOKENS.forEach(t=>{
const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i');
if (re.test(raw)) names.push(t);
});
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
const ages=[]; let m; while((m=ageRe.exec(raw))!==null){ if(!ages.includes(m[1])) ages.push(m[1]); }
const yearRe = /\b(19|20)\d{2}\b/g;
const years=[]; while((m=yearRe.exec(raw))!==null){ if(!years.includes(m[0])) years.push(m[0]); }
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq=new Set(); const words=[]; let w;
while((w=wordRe.exec(raw))!==null){ const s=w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } }
return { names, ages, years, words, raw };
}
// ========================================================================
// Suche im Wiki (3 Pässe)
// ========================================================================
async function searchWikiSmart(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const MAX = limit || 12, ns0=0;
function incats(){
return ADOS_CATEGORIES.map(c => 'incategory:"'+c+'"').join(' ');
}
const pass1=[];
if (hints.names.length){
hints.names.forEach(n=>{
if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incats()}`));
if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incats()}`));
pass1.push(`intitle:"${n}" ${incats()}`);
});
}
const key=[].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x=>`"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incats()}` ] : [];
const pass3=[];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen=new Set(), out=[];
async function runSr(q){
const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
(r.query?.search || []).forEach(it=>{
const k=it.title; if (seen.has(k)) return; seen.add(k); out.push(it);
});
}
for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
for (const p of pass3){
const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX });
(r.query?.prefixsearch || []).forEach(it=>{
const title = it.title || it['*']; const k=title;
if (seen.has(k)) return; seen.add(k); out.push({ title, snippet:'' });
});
if (out.length>=MAX) break;
}
return out.slice(0,MAX);
}
// ========================================================================
// Treffer rendern
// ========================================================================
function esc(s){ return mw.html.escape(String(s||'')); }
function renderResults(items){
const box = byId('ados-scan-results'); if (!box) return;
box.innerHTML='';
if (!items || !items.length){
box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0,12).forEach(it=>{
const title = it.title || '';
const link = mw.util.getUrl(title.replace(/ /g,'_'));
const snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"');
const div=document.createElement('div'); div.className='ados-hit';
div.innerHTML = '<b><a href="'+link+'">'+esc(title)+'</a></b>' + (snip?'<div class="meta">'+snip+'</div>':'');
box.appendChild(div);
});
}
// ========================================================================
// BINDING
// ========================================================================
let BOUND=false;
function bind(){
if (BOUND || !hasUI()) return;
const runBtn = byId('ados-scan-run');
const fileIn = byId('ados-scan-file');
const bigBtn = byId('ados-scan-bigbtn');
const drop = byId('ados-scan-drop');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound='1'; BOUND=true;
if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
fileIn.addEventListener('change', function(){ if (this.files && this.files[0]) showPreview(this.files[0]); });
// Drag&Drop
if (drop){
['dragenter','dragover'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.add('is-drag'); }));
['dragleave','drop'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.remove('is-drag'); }));
drop.addEventListener('drop', e=>{
const f = e.dataTransfer && e.dataTransfer.files && e.dataTransfer.files[0];
if (f){ fileIn.files = e.dataTransfer.files; showPreview(f); }
});
}
// Klick „Erkennen & suchen“
runBtn.addEventListener('click', async function(ev){
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
const f = fileIn.files[0];
try{
runBtn.disabled=true; runBtn.textContent='Erkenne …';
setStatus('Erkenne Label …');
const text = await runOCR(f);
setStatus('Suche im Wiki …');
const hints = extractHints(text);
const hits = await searchWikiSmart(hints, 12);
renderResults(hits);
setStatus('Fertig.');
} catch(e){
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen';
}
});
// Sicherheit gegen Overlays
const wrap = byId('ados-labelscan'); if (wrap) wrap.style.position='relative';
runBtn.style.position='relative'; runBtn.style.zIndex='9999'; runBtn.style.pointerEvents='auto';
}
// Erstbindung + Fallbacks + Observer
if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { bind(); }
setTimeout(bind, 250); setTimeout(bind, 1000);
const mo = new MutationObserver(() => { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList:true, subtree:true });
})();