MediaWiki:Gadget-LabelScan.js
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
// Relevante Kategorien im ADOS-Wiki (Titel exakt wie im Wiki)
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
// ein paar häufige Distillery-/Marken-Tokens (erweitern!)
const KNOWN_TOKENS = [
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
];
// Bild am Canvas vorbereiten (größer, kontrastreich, gerade für OCR)
async function preprocessImage(file){
const img = await new Promise((res, rej)=>{
const o = new Image();
o.onload = ()=>res(o);
o.onerror = rej;
o.src = URL.createObjectURL(file);
});
const MAX = 1800; // lange Kante
const scale = (img.width > img.height) ? MAX / img.width : MAX / img.height;
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d');
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
// Graustufen + leichter Kontrast-Boost
const id = ctx.getImageData(0,0,w,h);
const d = id.data;
for (let i=0; i<d.length; i+=4){
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
// Kontrast
const v = Math.max(0, Math.min(255, (g-128)*1.15 + 128));
d[i]=d[i+1]=d[i+2]=v;
}
ctx.putImageData(id,0,0);
return c; // Canvas statt File an Tesseract übergeben
}
async function runOCR(file){
await ensureTesseract();
setProgress(0);
const canvas = await preprocessImage(file);
const res = await Tesseract.recognize(canvas,'deu+eng',{
// psm 6: „Ein Block aus Text“, robust für Label
tessedit_pageseg_mode: 6,
preserve_interword_spaces: 1,
logger: m => { if(m?.status==='recognizing text') setProgress(m.progress||0); }
});
setProgress(null);
return (res && res.data && res.data.text) || '';
}
function extractHints(text){
const raw = String(text||'').replace(/\s+/g,' ').trim();
// Distillery/Marken: nimm Tokens, die wirklich vorkommen
const foundNames = [];
KNOWN_TOKENS.forEach(t=>{
const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i');
if (re.test(raw)) foundNames.push(t);
});
// Alter: „12 years“, „12 yo“, „12-year-old“, „14 Jahre“
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
const ages = [];
let m; while ((m = ageRe.exec(raw)) !== null){ const n = m[1]; if (!ages.includes(n)) ages.push(n); }
// Jahrgang
const yearRe = /\b(19|20)\d{2}\b/g;
const years=[]; while ((m = yearRe.exec(raw)) !== null){ if(!years.includes(m[0])) years.push(m[0]); }
// noch 5 „prominente“ Wörter (Großschreibung, >3 Zeichen)
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq = new Set(); let w; const words=[];
while ((w = wordRe.exec(raw)) !== null){ const s = w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } }
return { names: foundNames, ages, years, words, raw };
}
async function searchWikiSmart(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
function incatStr(){
return ADOS_CATEGORIES.map(c=>'incategory:"'+c+'"').join(' ');
}
// --- PASS 1: intitle-Kombis (sehr präzise)
const pass1 = [];
if (hints.names.length){
hints.names.forEach(n=>{
if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
pass1.push(`intitle:"${n}" ${incatStr()}`);
});
}
// --- PASS 2: gewichtete Volltext-Suche
const key = [].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x=>`"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
// --- PASS 3: Prefix auf Titel
const pass3 = [];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
// ausführen + zusammenführen
const seen = new Set(); const out = [];
async function runSr(q){
const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
(r.query?.search||[]).forEach(it=>{
const k = it.title; if (seen.has(k)) return; seen.add(k); out.push(it);
});
}
for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
// Prefix (list=prefixsearch)
for (const p of pass3){
const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX });
(r.query?.prefixsearch||[]).forEach(it=>{
const title = it.title || it['*']; const k = title; if (seen.has(k)) return; seen.add(k);
out.push({ title, snippet: '' });
});
if (out.length>=MAX) break;
}
return out.slice(0,MAX);
}
/* global mw, Tesseract */
(function(){
'use strict';
// Lädt Gadget nur, wenn der Scan-Block vorhanden ist.
function hasUI(){
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
// --- UI Helpers ---
function setStatus(t){ var el = document.getElementById('ados-scan-status'); if(el) el.textContent = t || ''; }
function setProgress(p){
var bar = document.getElementById('ados-scan-progress');
if(!bar) return;
if(p == null){ bar.style.display='none'; bar.value=0; }
else { bar.style.display=''; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview(file){
var url = URL.createObjectURL(file);
var prev = document.getElementById('ados-scan-preview');
if(prev){
prev.innerHTML =
'<img alt="Vorschau" src="'+url+'">';
prev.setAttribute('aria-hidden','false');
}
}
// --- Tesseract nur bei Bedarf laden ---
var tesseractReady;
function ensureTesseract(){
if (tesseractReady) return tesseractReady;
tesseractReady = new Promise(function(resolve, reject){
if (window.Tesseract) return resolve();
var s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = resolve;
s.onerror = function(){
var s2 = document.createElement('script');
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async = true;
s2.onload = resolve;
s2.onerror = function(){ reject(new Error('Tesseract konnte nicht geladen werden')); };
document.head.appendChild(s2);
};
document.head.appendChild(s);
});
return tesseractReady;
}
// --- OCR + Heuristik ---
async function runOCR(file){
await ensureTesseract();
setProgress(0);
var res = await Tesseract.recognize(file,'deu+eng',{
logger: function(m){
if(m && m.status === 'recognizing text' && typeof m.progress === 'number'){
setProgress(m.progress);
}
}
});
setProgress(null);
return (res && res.data && res.data.text) || '';
}
function extractHints(text){
var raw = String(text||'').replace(/\s+/g,' ').trim();
var wordRe = new RegExp('\\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\\-]{3,}\\b','g');
var w = []; var m;
while ((m = wordRe.exec(raw)) !== null) { if (w.indexOf(m[0]) < 0) w.push(m[0]); }
w = w.slice(0, 6);
var ageRe = new RegExp('\\b([1-9]\\d?)\\s?(?:years?|yo|jahr|jahre)\\b','gi');
var ages=[]; while ((m = ageRe.exec(raw)) !== null) { var n=(m[0].match(/[1-9]\d?/)||[])[0]; if(n && ages.indexOf(n)<0) ages.push(n); }
var yearRe = new RegExp('\\b(19|20)\\d{2}\\b','g');
var years=[]; while ((m = yearRe.exec(raw)) !== null) { if (years.indexOf(m[0])<0) years.push(m[0]); }
return { words: w, ages: ages, years: years };
}
function buildSearchQuery(h){
var parts = [];
(h.words||[]).forEach(function(w){ parts.push('"'+w+'"'); });
(h.ages||[]).forEach(function(a){ parts.push('"'+a+'"'); });
(h.years||[]).forEach(function(y){ parts.push('"'+y+'"'); });
if (!parts.length) parts.push('Whisky');
return parts.join(' ');
}
// --- Wiki-Suche ---
async function searchWiki(query, limit){
await mw.loader.using('mediawiki.api');
var api = new mw.Api();
var res = await api.get({
action: 'query',
list: 'search',
srsearch: query,
srlimit: limit || 12,
srwhat: 'text',
formatversion: 2
});
return (res.query && res.query.search) || [];
}
function esc(s){ return mw.html.escape(String(s||'')); }
function renderResults(items){
var box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length){
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0,12).forEach(function(it){
var title = it.title || '';
var link = mw.util.getUrl(title.replace(/ /g,'_'));
var snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"');
var div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="'+link+'">'+esc(title)+'</a></b>' +
(snip ? '<div class="meta">'+snip+'</div>' : '');
box.appendChild(div);
});
}
// --- Bindung ---
var BOUND = false;
function bind(){
if (BOUND || !hasUI()) return;
var runBtn = document.getElementById('ados-scan-run');
var fileIn = document.getElementById('ados-scan-file');
var bigBtn = document.getElementById('ados-scan-bigbtn');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1'; BOUND = true;
if (bigBtn) bigBtn.addEventListener('click', function(){ fileIn.click(); });
fileIn.addEventListener('change', function(){
if (this.files && this.files[0]) showPreview(this.files[0]);
});
runBtn.addEventListener('click', async function(ev){
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
var f = fileIn.files[0];
try{
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Erkenne Label …');
var text = await runOCR(f);
setStatus('Suche im Wiki …');
var hints = extractHints(text);
var query = buildSearchQuery(hints);
const hints = extractHints(text);
const hits = await searchWikiSmart(hints, 12);
renderResults(hits);
renderResults(hits);
setStatus('Fertig.');
} catch (e){
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
}
});
// Sicherheit gegen Overlays
var wrap = document.getElementById('ados-labelscan');
if (wrap) wrap.style.position = 'relative';
runBtn.style.position = 'relative';
runBtn.style.zIndex = '9999';
runBtn.style.pointerEvents = 'auto';
}
// Erstbindung + Fallbacks + Observer (auch für Mobile/VE)
if (document.readyState === 'loading'){
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
setTimeout(bind, 250);
setTimeout(bind, 1000);
var mo = new MutationObserver(function(){ if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList:true, subtree:true });
})();