MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 3: | Zeile 3: | ||
'use strict'; | 'use strict'; | ||
// | // ======================================================================== | ||
// | // KONFIG | ||
// | // ======================================================================== | ||
// In welchen Kategorien wird gesucht? | |||
// In | |||
const ADOS_CATEGORIES = [ | const ADOS_CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | 'Alle A Dream of Scotland Abfüllungen', | ||
| Zeile 20: | Zeile 17: | ||
]; | ]; | ||
// Distillery-/ | // Häufige Distillery-/Serien-Tokens (zum „Einhaken“ in die Suche) | ||
const KNOWN_TOKENS = [ | const KNOWN_TOKENS = [ | ||
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | 'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | ||
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | 'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | ||
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | 'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | ||
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland' | 'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland', | ||
'A Dream of Scotland','A Dream of Ireland','The Fine Art of Whisky', | |||
'The Tasteful 8','Friendly Mr. Z','Die Whisky Elfen','Rumbastic' | |||
]; | ]; | ||
// - | // Debug: Roh-OCR unten anzeigen, wenn true | ||
// | window.ADOS_SCAN_DEBUG = window.ADOS_SCAN_DEBUG || false; | ||
// - | |||
function hasUI () { | // ======================================================================== | ||
return !! | // DOM-Helfer | ||
// ======================================================================== | |||
function byId(id){ return document.getElementById(id); } | |||
function hasUI(){ | |||
return !!byId('ados-scan-run') && !!byId('ados-scan-file'); | |||
} | } | ||
function setProgress (p) { | function setStatus(t){ const el = byId('ados-scan-status'); if (el) el.textContent = t || ''; } | ||
function setProgress(p){ | |||
const bar = byId('ados-scan-progress'); if (!bar) return; | |||
if (p == null) { bar.hidden = true; bar.value = 0; } | if (p == null){ bar.hidden = true; bar.value = 0; } | ||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | ||
} | } | ||
function showPreview (file) { | function showPreview(file){ | ||
const url = URL.createObjectURL(file); | |||
const prev = byId('ados-scan-preview'); | |||
if (prev) { | if (prev){ prev.innerHTML = '<img alt="Vorschau" src="'+url+'">'; prev.setAttribute('aria-hidden','false'); } | ||
} | } | ||
// | // ======================================================================== | ||
// | // TESSERACT sauber als WORKER laden (deu+eng) | ||
// | // ======================================================================== | ||
let _ocrWorkerPromise = null; | |||
function | function getOcrWorker(){ | ||
if ( | if (_ocrWorkerPromise) return _ocrWorkerPromise; | ||
_ocrWorkerPromise = (async () => { | |||
if (window.Tesseract) | if (!window.Tesseract){ | ||
await new Promise((res, rej) => { | |||
const s=document.createElement('script'); | |||
s.src='https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js'; | |||
s.async=true; s.onload=res; s.onerror=() => { | |||
const s2=document.createElement('script'); | |||
s2.src='https://unpkg.com/tesseract.js@5/dist/tesseract.min.js'; | |||
s2.async=true; s2.onload=res; s2.onerror=rej; document.head.appendChild(s2); | |||
}; | |||
document.head.appendChild(s); | |||
}); | |||
} | |||
const { createWorker } = Tesseract; | |||
const worker = await createWorker({ | |||
}); | workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js', | ||
return | corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core.wasm.js', | ||
langPath: 'https://tessdata.projectnaptha.com/5', | |||
logger: m => { | |||
if (m && m.status === 'recognizing text' && typeof m.progress === 'number'){ | |||
setProgress(0.05 + m.progress * 0.9); | |||
} | |||
} | |||
}); | |||
await worker.loadLanguage('deu+eng'); | |||
await worker.initialize('deu+eng'); | |||
await worker.setParameters({ tessedit_pageseg_mode: '11', user_defined_dpi: '300' }); | |||
return worker; | |||
})(); | |||
return _ocrWorkerPromise; | |||
} | } | ||
// | // ======================================================================== | ||
// | // Bild-Vorverarbeitung: Skalierung + adaptives Thresholding | ||
// ======================================================================== | |||
function scaleToCanvas(img, maxSide = 2000){ | |||
// | const s = Math.min(1, maxSide / Math.max(img.width, img.height)); | ||
const w = Math.round(img.width * s), h = Math.round(img.height * s); | |||
function | const c = document.createElement('canvas'); c.width=w; c.height=h; | ||
const | const ctx=c.getContext('2d'); ctx.imageSmoothingEnabled = true; | ||
const w = Math.round(img.width * | ctx.drawImage(img,0,0,w,h); | ||
const c = document.createElement('canvas'); | |||
const ctx = c.getContext('2d'); | |||
ctx.drawImage(img, 0, 0, w, h); | |||
return c; | return c; | ||
} | } | ||
const gray = new Uint8ClampedArray(w*h); | function adaptiveThreshold(src){ | ||
for (let i=0,j=0;i<d.length;i+=4,++j) { | const w=src.width, h=src.height; | ||
gray[j] = (0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2])|0; | const out=document.createElement('canvas'); out.width=w; out.height=h; | ||
const sctx=src.getContext('2d'), octx=out.getContext('2d'); | |||
const id=sctx.getImageData(0,0,w,h), d=id.data; | |||
const gray=new Uint8ClampedArray(w*h); | |||
for(let i=0,j=0;i<d.length;i+=4,++j){ | |||
gray[j]=(0.2126*d[i]+0.7152*d[i+1]+0.0722*d[i+2])|0; | |||
} | } | ||
const S = new Uint32Array((w+1)*(h+1)); | const S=new Uint32Array((w+1)*(h+1)); | ||
for (let y=1;y<=h;y++) { | for(let y=1;y<=h;y++){ | ||
let | let row=0; | ||
for (let x=1;x<=w;x++) { | for(let x=1;x<=w;x++){ | ||
const v = gray[(y-1)*w + (x-1)]; | const v=gray[(y-1)*w + (x-1)]; | ||
row+=v; S[y*(w+1)+x]=S[(y-1)*(w+1)+x]+row; | |||
} | } | ||
} | } | ||
const win = Math.max(15, Math.round(Math.min(w,h)/24)); | const win=Math.max(15, Math.round(Math.min(w,h)/24)); | ||
const | const outId=octx.createImageData(w,h), od=outId.data; | ||
const C = 7; | const C=7; | ||
for(let y=0;y<h;y++){ | |||
for (let y=0;y<h;y++) { | const y0=Math.max(0,y-win), y1=Math.min(h-1,y+win); | ||
const y0 = Math.max(0, y - win), y1 = Math.min(h-1, y + win); | for(let x=0;x<w;x++){ | ||
for (let x=0;x<w;x++) { | const x0=Math.max(0,x-win), x1=Math.min(w-1,x+win); | ||
const x0 = Math.max(0, x - win), x1 = Math.min(w-1, x + win); | const A=S[y0*(w+1)+x0], B=S[(y1+1)*(w+1)+x0], Cc=S[y0*(w+1)+(x1+1)], Dd=S[(y1+1)*(w+1)+(x1+1)]; | ||
const A = S[y0*(w+1)+x0] | const area=(x1-x0+1)*(y1-y0+1); | ||
const mean=(Dd + A - B - Cc)/area; | |||
const g=gray[y*w+x]; | |||
const area = (x1-x0+1)*(y1-y0+1); | |||
const mean = | |||
const g = gray[y*w + x]; | |||
const pix = g < (mean - C) ? 0 : 255; | const pix = g < (mean - C) ? 0 : 255; | ||
const k = (y*w + x)*4; | const k=(y*w+x)*4; od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255; | ||
} | } | ||
} | } | ||
octx.putImageData( | octx.putImageData(outId,0,0); | ||
return out; | return out; | ||
} | } | ||
async function | |||
// ======================================================================== | |||
// OCR: mehrere Strategien (PSM 11 → 6 → 4, binarisiert & farbig) | |||
// ======================================================================== | |||
async function runOCR(file){ | |||
setProgress(0.02); | |||
const img = await new Promise((res, rej) => { | const img = await new Promise((res, rej) => { | ||
const o = new Image(); | const o=new Image(); o.onload=()=>res(o); o.onerror=rej; | ||
o.src=URL.createObjectURL(file); | |||
o.src = URL.createObjectURL(file); | |||
}); | }); | ||
const base = | |||
const base = scaleToCanvas(img, 2000); | |||
const bin = adaptiveThreshold(base); | const bin = adaptiveThreshold(base); | ||
const | const worker = await getOcrWorker(); | ||
{ | const candidates = [ | ||
{ | { canvas: bin, psm: '11' }, | ||
{ | { canvas: base, psm: '11' }, | ||
{ canvas: bin, psm: '6' }, | |||
{ canvas: base, psm: '6' }, | |||
{ canvas: bin, psm: '4' }, | |||
{ canvas: base, psm: '4' } | |||
]; | ]; | ||
let best = ''; | |||
let | for (let i=0;i<candidates.length;i++){ | ||
const c = candidates[i]; | |||
await worker.setParameters({ tessedit_pageseg_mode: c.psm }); | |||
const | const { data } = await worker.recognize(c.canvas); | ||
const txt = (data && data.text ? data.text : '').trim(); | |||
if (txt.length > best.length) best = txt; | |||
if (best.length > 40) break; | |||
setProgress(0.96 + i * 0.008); | |||
} | } | ||
setProgress(null); | setProgress(null); | ||
if (window.ADOS_SCAN_DEBUG){ | |||
const box = byId('ados-scan-ocr'); | |||
if (box) box.textContent = best || '(leer)'; | |||
} | |||
return best; | |||
} | |||
return | |||
} | } | ||
// | // ======================================================================== | ||
// | // Hinweise aus OCR destillieren | ||
// | // ======================================================================== | ||
function extractHints (text) { | function extractHints(text){ | ||
const raw = String(text || '').replace(/\s+/g, ' ').trim( | const raw = String(text||'').replace(/\s+/g,' ').trim(); | ||
const names = []; | |||
const | KNOWN_TOKENS.forEach(t=>{ | ||
KNOWN_TOKENS.forEach(t => { | const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i'); | ||
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | if (re.test(raw)) names.push(t); | ||
if (re.test( | |||
}); | }); | ||
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | ||
while ((m = ageRe.exec( | const ages=[]; let m; while((m=ageRe.exec(raw))!==null){ if(!ages.includes(m[1])) ages.push(m[1]); } | ||
const yearRe = /\b(19|20)\d{2}\b/g; | const yearRe = /\b(19|20)\d{2}\b/g; | ||
while ((m = yearRe.exec( | const years=[]; while((m=yearRe.exec(raw))!==null){ if(!years.includes(m[0])) years.push(m[0]); } | ||
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | ||
const uniq = new Set() | const uniq=new Set(); const words=[]; let w; | ||
while ((w = wordRe.exec( | while((w=wordRe.exec(raw))!==null){ const s=w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } } | ||
return { names | return { names, ages, years, words, raw }; | ||
} | } | ||
// | // ======================================================================== | ||
// | // Suche im Wiki (3 Pässe) | ||
// | // ======================================================================== | ||
async function searchWikiSmart (hints, limit) { | async function searchWikiSmart(hints, limit){ | ||
await mw.loader.using('mediawiki.api'); | await mw.loader.using('mediawiki.api'); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
const MAX = limit || 12, ns0=0; | |||
const MAX = limit || 12; | |||
function | function incats(){ | ||
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' '); | return ADOS_CATEGORIES.map(c => 'incategory:"'+c+'"').join(' '); | ||
} | } | ||
const pass1 = []; | const pass1=[]; | ||
if (hints.names.length) { | if (hints.names.length){ | ||
hints.names.forEach(n => { | hints.names.forEach(n=>{ | ||
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${ | if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incats()}`)); | ||
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${ | if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incats()}`)); | ||
pass1.push(`intitle:"${n}" ${ | pass1.push(`intitle:"${n}" ${incats()}`); | ||
}); | }); | ||
} | } | ||
const key = [] | const key=[].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3)) | ||
.map(x=>`"${x}"`).join(' '); | |||
const pass2 = key ? [ `${key} ${incats()}` ] : []; | |||
const pass2 = key ? [ `${key} ${ | |||
const pass3 = []; | const pass3=[]; | ||
if (hints.names.length) pass3.push(hints.names[0]); | if (hints.names.length) pass3.push(hints.names[0]); | ||
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]); | if (!pass3.length && hints.words.length) pass3.push(hints.words[0]); | ||
const seen = new Set() | const seen=new Set(), out=[]; | ||
async function runSr (q) { | async function runSr(q){ | ||
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 }); | const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 }); | ||
(r.query?.search || []).forEach(it => { | (r.query?.search || []).forEach(it=>{ | ||
const k = it.title; | const k=it.title; if (seen.has(k)) return; seen.add(k); out.push(it); | ||
}); | }); | ||
} | } | ||
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); } | ||
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); } | ||
for (const p of pass3) { | for (const p of pass3){ | ||
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX }); | ||
(r.query?.prefixsearch || []).forEach(it => { | (r.query?.prefixsearch || []).forEach(it=>{ | ||
const title = it.title || it['*']; | const title = it.title || it['*']; const k=title; | ||
if (seen.has(k)) return; seen.add(k); out.push({ title, snippet:'' }); | |||
if (seen.has(k)) return; | |||
}); | }); | ||
if (out.length >= MAX) break; | if (out.length>=MAX) break; | ||
} | } | ||
return out.slice(0, MAX); | return out.slice(0,MAX); | ||
} | } | ||
// | // ======================================================================== | ||
// | // Treffer rendern | ||
// | // ======================================================================== | ||
function renderResults (items) { | function esc(s){ return mw.html.escape(String(s||'')); } | ||
function renderResults(items){ | |||
const box = byId('ados-scan-results'); if (!box) return; | |||
box.innerHTML = ''; | box.innerHTML=''; | ||
if (!items || !items.length) { | if (!items || !items.length){ | ||
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | ||
return; | return; | ||
} | } | ||
items.slice(0, 12).forEach | items.slice(0,12).forEach(it=>{ | ||
const title = it.title || ''; | |||
const link = mw.util.getUrl(title.replace(/ /g,'_')); | |||
const snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"'); | |||
const div=document.createElement('div'); div.className='ados-hit'; | |||
div.innerHTML = '<b><a href="'+link+'">'+esc(title)+'</a></b>' + (snip?'<div class="meta">'+snip+'</div>':''); | |||
div.innerHTML = | |||
box.appendChild(div); | box.appendChild(div); | ||
}); | }); | ||
} | } | ||
// | // ======================================================================== | ||
// | // BINDING | ||
// | // ======================================================================== | ||
let BOUND=false; | |||
function bind () { | function bind(){ | ||
if (BOUND || !hasUI()) return; | if (BOUND || !hasUI()) return; | ||
const runBtn = byId('ados-scan-run'); | |||
const fileIn = byId('ados-scan-file'); | |||
const bigBtn = byId('ados-scan-bigbtn'); | |||
const drop = byId('ados-scan-drop'); | |||
if (!runBtn || !fileIn) return; | if (!runBtn || !fileIn) return; | ||
if (runBtn.dataset.bound === '1') return; | if (runBtn.dataset.bound === '1') return; | ||
runBtn.dataset.bound = '1'; BOUND = true; | runBtn.dataset.bound='1'; BOUND=true; | ||
if (bigBtn) bigBtn.addEventListener('click', | if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click()); | ||
fileIn.addEventListener('change', function () { | fileIn.addEventListener('change', function(){ if (this.files && this.files[0]) showPreview(this.files[0]); }); | ||
// Drag&Drop | // Drag&Drop | ||
if (drop) { | if (drop){ | ||
['dragenter','dragover'].forEach(ev => | ['dragenter','dragover'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.add('is-drag'); })); | ||
['dragleave','drop'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.remove('is-drag'); })); | |||
['dragleave','drop'].forEach(ev => | drop.addEventListener('drop', e=>{ | ||
const f = e.dataTransfer && e.dataTransfer.files && e.dataTransfer.files[0]; | |||
drop.addEventListener('drop', e => { | if (f){ fileIn.files = e.dataTransfer.files; showPreview(f); } | ||
const f = e.dataTransfer | |||
if (f) { fileIn.files = e.dataTransfer.files; showPreview(f); } | |||
}); | }); | ||
} | } | ||
runBtn.addEventListener('click', async function (ev) { | // Klick „Erkennen & suchen“ | ||
runBtn.addEventListener('click', async function(ev){ | |||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
const f = fileIn.files[0]; | |||
try { | try{ | ||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | runBtn.disabled=true; runBtn.textContent='Erkenne …'; | ||
setStatus('Erkenne Label …'); | setStatus('Erkenne Label …'); | ||
const text = await runOCR(f); | |||
setStatus('Suche im Wiki …'); | setStatus('Suche im Wiki …'); | ||
const hints = extractHints(text); | |||
const hits = await searchWikiSmart(hints, 12); | |||
renderResults(hits); | renderResults(hits); | ||
setStatus('Fertig.'); | setStatus('Fertig.'); | ||
} catch (e) { | } catch(e){ | ||
console.error('[LabelScan]', e); | console.error('[LabelScan]', e); | ||
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | ||
} finally { | } finally { | ||
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; | runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen'; | ||
} | } | ||
}); | }); | ||
// Sicherheit gegen Overlays | // Sicherheit gegen Overlays | ||
const wrap = byId('ados-labelscan'); if (wrap) wrap.style.position='relative'; | |||
runBtn.style.position='relative'; runBtn.style.zIndex='9999'; runBtn.style.pointerEvents='auto'; | |||
runBtn.style.position = 'relative'; | |||
} | } | ||
// | // Erstbindung + Fallbacks + Observer | ||
if (document.readyState === 'loading') { | if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { bind(); } | ||
setTimeout(bind, 250); setTimeout(bind, 1000); | |||
const mo = new MutationObserver(() => { if (!BOUND) bind(); }); | |||
mo.observe(document.documentElement || document.body, { childList:true, subtree:true }); | |||
setTimeout(bind, 250); | |||
mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | |||
})(); | })(); | ||
Version vom 6. November 2025, 00:57 Uhr
/* global mw, Tesseract */
(function () {
'use strict';
// ========================================================================
// KONFIG
// ========================================================================
// In welchen Kategorien wird gesucht?
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
// Häufige Distillery-/Serien-Tokens (zum „Einhaken“ in die Suche)
const KNOWN_TOKENS = [
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland',
'A Dream of Scotland','A Dream of Ireland','The Fine Art of Whisky',
'The Tasteful 8','Friendly Mr. Z','Die Whisky Elfen','Rumbastic'
];
// Debug: Roh-OCR unten anzeigen, wenn true
window.ADOS_SCAN_DEBUG = window.ADOS_SCAN_DEBUG || false;
// ========================================================================
// DOM-Helfer
// ========================================================================
function byId(id){ return document.getElementById(id); }
function hasUI(){
return !!byId('ados-scan-run') && !!byId('ados-scan-file');
}
function setStatus(t){ const el = byId('ados-scan-status'); if (el) el.textContent = t || ''; }
function setProgress(p){
const bar = byId('ados-scan-progress'); if (!bar) return;
if (p == null){ bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview(file){
const url = URL.createObjectURL(file);
const prev = byId('ados-scan-preview');
if (prev){ prev.innerHTML = '<img alt="Vorschau" src="'+url+'">'; prev.setAttribute('aria-hidden','false'); }
}
// ========================================================================
// TESSERACT sauber als WORKER laden (deu+eng)
// ========================================================================
let _ocrWorkerPromise = null;
function getOcrWorker(){
if (_ocrWorkerPromise) return _ocrWorkerPromise;
_ocrWorkerPromise = (async () => {
if (!window.Tesseract){
await new Promise((res, rej) => {
const s=document.createElement('script');
s.src='https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async=true; s.onload=res; s.onerror=() => {
const s2=document.createElement('script');
s2.src='https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async=true; s2.onload=res; s2.onerror=rej; document.head.appendChild(s2);
};
document.head.appendChild(s);
});
}
const { createWorker } = Tesseract;
const worker = await createWorker({
workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core.wasm.js',
langPath: 'https://tessdata.projectnaptha.com/5',
logger: m => {
if (m && m.status === 'recognizing text' && typeof m.progress === 'number'){
setProgress(0.05 + m.progress * 0.9);
}
}
});
await worker.loadLanguage('deu+eng');
await worker.initialize('deu+eng');
await worker.setParameters({ tessedit_pageseg_mode: '11', user_defined_dpi: '300' });
return worker;
})();
return _ocrWorkerPromise;
}
// ========================================================================
// Bild-Vorverarbeitung: Skalierung + adaptives Thresholding
// ========================================================================
function scaleToCanvas(img, maxSide = 2000){
const s = Math.min(1, maxSide / Math.max(img.width, img.height));
const w = Math.round(img.width * s), h = Math.round(img.height * s);
const c = document.createElement('canvas'); c.width=w; c.height=h;
const ctx=c.getContext('2d'); ctx.imageSmoothingEnabled = true;
ctx.drawImage(img,0,0,w,h);
return c;
}
function adaptiveThreshold(src){
const w=src.width, h=src.height;
const out=document.createElement('canvas'); out.width=w; out.height=h;
const sctx=src.getContext('2d'), octx=out.getContext('2d');
const id=sctx.getImageData(0,0,w,h), d=id.data;
const gray=new Uint8ClampedArray(w*h);
for(let i=0,j=0;i<d.length;i+=4,++j){
gray[j]=(0.2126*d[i]+0.7152*d[i+1]+0.0722*d[i+2])|0;
}
const S=new Uint32Array((w+1)*(h+1));
for(let y=1;y<=h;y++){
let row=0;
for(let x=1;x<=w;x++){
const v=gray[(y-1)*w + (x-1)];
row+=v; S[y*(w+1)+x]=S[(y-1)*(w+1)+x]+row;
}
}
const win=Math.max(15, Math.round(Math.min(w,h)/24));
const outId=octx.createImageData(w,h), od=outId.data;
const C=7;
for(let y=0;y<h;y++){
const y0=Math.max(0,y-win), y1=Math.min(h-1,y+win);
for(let x=0;x<w;x++){
const x0=Math.max(0,x-win), x1=Math.min(w-1,x+win);
const A=S[y0*(w+1)+x0], B=S[(y1+1)*(w+1)+x0], Cc=S[y0*(w+1)+(x1+1)], Dd=S[(y1+1)*(w+1)+(x1+1)];
const area=(x1-x0+1)*(y1-y0+1);
const mean=(Dd + A - B - Cc)/area;
const g=gray[y*w+x];
const pix = g < (mean - C) ? 0 : 255;
const k=(y*w+x)*4; od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
}
}
octx.putImageData(outId,0,0);
return out;
}
// ========================================================================
// OCR: mehrere Strategien (PSM 11 → 6 → 4, binarisiert & farbig)
// ========================================================================
async function runOCR(file){
setProgress(0.02);
const img = await new Promise((res, rej) => {
const o=new Image(); o.onload=()=>res(o); o.onerror=rej;
o.src=URL.createObjectURL(file);
});
const base = scaleToCanvas(img, 2000);
const bin = adaptiveThreshold(base);
const worker = await getOcrWorker();
const candidates = [
{ canvas: bin, psm: '11' },
{ canvas: base, psm: '11' },
{ canvas: bin, psm: '6' },
{ canvas: base, psm: '6' },
{ canvas: bin, psm: '4' },
{ canvas: base, psm: '4' }
];
let best = '';
for (let i=0;i<candidates.length;i++){
const c = candidates[i];
await worker.setParameters({ tessedit_pageseg_mode: c.psm });
const { data } = await worker.recognize(c.canvas);
const txt = (data && data.text ? data.text : '').trim();
if (txt.length > best.length) best = txt;
if (best.length > 40) break;
setProgress(0.96 + i * 0.008);
}
setProgress(null);
if (window.ADOS_SCAN_DEBUG){
const box = byId('ados-scan-ocr');
if (box) box.textContent = best || '(leer)';
}
return best;
}
// ========================================================================
// Hinweise aus OCR destillieren
// ========================================================================
function extractHints(text){
const raw = String(text||'').replace(/\s+/g,' ').trim();
const names = [];
KNOWN_TOKENS.forEach(t=>{
const re = new RegExp('\\b'+t.replace(/[.*+?^${}()|[\]\\]/g,'\\$&')+'\\b','i');
if (re.test(raw)) names.push(t);
});
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
const ages=[]; let m; while((m=ageRe.exec(raw))!==null){ if(!ages.includes(m[1])) ages.push(m[1]); }
const yearRe = /\b(19|20)\d{2}\b/g;
const years=[]; while((m=yearRe.exec(raw))!==null){ if(!years.includes(m[0])) years.push(m[0]); }
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq=new Set(); const words=[]; let w;
while((w=wordRe.exec(raw))!==null){ const s=w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; } }
return { names, ages, years, words, raw };
}
// ========================================================================
// Suche im Wiki (3 Pässe)
// ========================================================================
async function searchWikiSmart(hints, limit){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const MAX = limit || 12, ns0=0;
function incats(){
return ADOS_CATEGORIES.map(c => 'incategory:"'+c+'"').join(' ');
}
const pass1=[];
if (hints.names.length){
hints.names.forEach(n=>{
if (hints.ages.length) hints.ages.forEach(a=> pass1.push(`intitle:"${n}" intitle:${a} ${incats()}`));
if (hints.years.length) hints.years.forEach(y=> pass1.push(`intitle:"${n}" "${y}" ${incats()}`));
pass1.push(`intitle:"${n}" ${incats()}`);
});
}
const key=[].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
.map(x=>`"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incats()}` ] : [];
const pass3=[];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen=new Set(), out=[];
async function runSr(q){
const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
(r.query?.search || []).forEach(it=>{
const k=it.title; if (seen.has(k)) return; seen.add(k); out.push(it);
});
}
for (const q of pass1){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
for (const q of pass2){ await runSr(q); if (out.length>=MAX) return out.slice(0,MAX); }
for (const p of pass3){
const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace:ns0, pslimit:MAX });
(r.query?.prefixsearch || []).forEach(it=>{
const title = it.title || it['*']; const k=title;
if (seen.has(k)) return; seen.add(k); out.push({ title, snippet:'' });
});
if (out.length>=MAX) break;
}
return out.slice(0,MAX);
}
// ========================================================================
// Treffer rendern
// ========================================================================
function esc(s){ return mw.html.escape(String(s||'')); }
function renderResults(items){
const box = byId('ados-scan-results'); if (!box) return;
box.innerHTML='';
if (!items || !items.length){
box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0,12).forEach(it=>{
const title = it.title || '';
const link = mw.util.getUrl(title.replace(/ /g,'_'));
const snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"');
const div=document.createElement('div'); div.className='ados-hit';
div.innerHTML = '<b><a href="'+link+'">'+esc(title)+'</a></b>' + (snip?'<div class="meta">'+snip+'</div>':'');
box.appendChild(div);
});
}
// ========================================================================
// BINDING
// ========================================================================
let BOUND=false;
function bind(){
if (BOUND || !hasUI()) return;
const runBtn = byId('ados-scan-run');
const fileIn = byId('ados-scan-file');
const bigBtn = byId('ados-scan-bigbtn');
const drop = byId('ados-scan-drop');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound='1'; BOUND=true;
if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
fileIn.addEventListener('change', function(){ if (this.files && this.files[0]) showPreview(this.files[0]); });
// Drag&Drop
if (drop){
['dragenter','dragover'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.add('is-drag'); }));
['dragleave','drop'].forEach(ev=> drop.addEventListener(ev, e=>{ e.preventDefault(); drop.classList.remove('is-drag'); }));
drop.addEventListener('drop', e=>{
const f = e.dataTransfer && e.dataTransfer.files && e.dataTransfer.files[0];
if (f){ fileIn.files = e.dataTransfer.files; showPreview(f); }
});
}
// Klick „Erkennen & suchen“
runBtn.addEventListener('click', async function(ev){
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
const f = fileIn.files[0];
try{
runBtn.disabled=true; runBtn.textContent='Erkenne …';
setStatus('Erkenne Label …');
const text = await runOCR(f);
setStatus('Suche im Wiki …');
const hints = extractHints(text);
const hits = await searchWikiSmart(hints, 12);
renderResults(hits);
setStatus('Fertig.');
} catch(e){
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled=false; runBtn.textContent='🔍 Erkennen & suchen';
}
});
// Sicherheit gegen Overlays
const wrap = byId('ados-labelscan'); if (wrap) wrap.style.position='relative';
runBtn.style.position='relative'; runBtn.style.zIndex='9999'; runBtn.style.pointerEvents='auto';
}
// Erstbindung + Fallbacks + Observer
if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { bind(); }
setTimeout(bind, 250); setTimeout(bind, 1000);
const mo = new MutationObserver(() => { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList:true, subtree:true });
})();