MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 3: | Zeile 3: | ||
'use strict'; | 'use strict'; | ||
// | // ------------------------------------------------------------ | ||
// | // 0) Konfiguration | ||
// = | // ------------------------------------------------------------ | ||
// Debug-Ausgabe der reinen OCR-Texte (Optional: im Browser einstellen) | |||
// window.ADOS_SCAN_DEBUG = true; | |||
// | // In diesen Kategorien sollen Treffer bevorzugt gesucht werden: | ||
const ADOS_CATEGORIES = [ | const ADOS_CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | 'Alle A Dream of Scotland Abfüllungen', | ||
| Zeile 18: | Zeile 20: | ||
]; | ]; | ||
// Distillery-/Marken-Tokens (wird für „hints“ verwendet) | |||
// Distillery / | const KNOWN_TOKENS = [ | ||
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | |||
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | |||
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | |||
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland' | |||
]; | |||
]; | |||
// ------------------------------------------------------------ | |||
// 1) UI Helpers | |||
// ------------------------------------------------------------ | |||
function hasUI () { | function hasUI () { | ||
return !!document.getElementById('ados-scan-run') && | return !!document.getElementById('ados-scan-run') && | ||
!!document.getElementById('ados-scan-file'); | !!document.getElementById('ados-scan-file'); | ||
} | } | ||
function setStatus (t) { | function setStatus (t) { | ||
var el = document.getElementById('ados-scan-status'); | var el = document.getElementById('ados-scan-status'); | ||
if (el) el.textContent = t || ''; | if (el) el.textContent = t || ''; | ||
} | } | ||
function setProgress (p) { | function setProgress (p) { | ||
var bar = document.getElementById('ados-scan-progress'); | var bar = document.getElementById('ados-scan-progress'); | ||
| Zeile 60: | Zeile 45: | ||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | ||
} | } | ||
function showPreview (file) { | function showPreview (file) { | ||
var url = URL.createObjectURL(file); | var url = URL.createObjectURL(file); | ||
var prev = document.getElementById('ados-scan-preview'); | var prev = document.getElementById('ados-scan-preview'); | ||
if (prev) { | if (prev) { | ||
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">'; | prev.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" src="' + url + '">'; | ||
prev.setAttribute('aria-hidden', 'false'); | prev.setAttribute('aria-hidden', 'false'); | ||
} | } | ||
} | } | ||
function esc (s) { return mw.html.escape(String(s || '')); } | |||
// ------------------------------------------------------------ | |||
// 2) Tesseract bei Bedarf laden | |||
// ------------------------------------------------------------ | |||
// | |||
// | |||
var tesseractReady; | var tesseractReady; | ||
function ensureTesseract () { | function ensureTesseract () { | ||
| Zeile 101: | Zeile 80: | ||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 3) Bild-Vorverarbeitung | ||
// | // - skalieren | ||
// | // - adaptives Thresholding (besser gegen Glanz/Folie) | ||
// - relative Crops zum Auslesen bestimmter Zonen | |||
// ------------------------------------------------------------ | |||
const | function fixCanvasOrientation(img, maxSide=2200) { | ||
const scale = Math.min(1, maxSide / Math.max(img.width, img.height)); | |||
const w = Math.round(img.width * scale); | |||
const h = Math.round(img.height * scale); | |||
const c = document.createElement('canvas'); | |||
}) | c.width = w; c.height = h; | ||
const ctx = c.getContext('2d'); | |||
const | ctx.imageSmoothingEnabled = true; | ||
const | ctx.drawImage(img, 0, 0, w, h); | ||
const | return c; | ||
} | |||
const | function cropRel(srcCanvas, x, y, w, h) { | ||
const | const sw = srcCanvas.width, sh = srcCanvas.height; | ||
const cx = Math.round(x * sw), cy = Math.round(y * sh); | |||
const cw = Math.round(w * sw), ch = Math.round(h * sh); | |||
const out = document.createElement('canvas'); | |||
out.width = cw; out.height = ch; | |||
const octx = out.getContext('2d'); | |||
octx.drawImage(srcCanvas, cx, cy, cw, ch, 0, 0, cw, ch); | |||
return out; | |||
} | |||
function adaptiveThreshold(srcCanvas) { | |||
const w = srcCanvas.width, h = srcCanvas.height; | |||
const out = document.createElement('canvas'); out.width = w; out.height = h; | |||
const sctx = srcCanvas.getContext('2d'); | |||
const octx = out.getContext('2d'); | |||
const id = sctx.getImageData(0,0,w,h); | |||
const d = id.data; | |||
const gray = new Uint8ClampedArray(w*h); | |||
for (let i=0,j=0;i<d.length;i+=4,++j) { | |||
for (let i=0;i<d.length;i+=4){ | gray[j] = (0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2])|0; | ||
} | } | ||
const S = new Uint32Array((w+1)*(h+1)); | |||
for (let y=1;y<=h;y++) { | |||
let rowsum = 0; | |||
for (let x=1;x<=w;x++) { | |||
const | const v = gray[(y-1)*w + (x-1)]; | ||
rowsum += v; | |||
for (let y=1;y<h | S[y*(w+1)+x] = S[(y-1)*(w+1)+x] + rowsum; | ||
for (let x=1;x<w | |||
const | |||
} | } | ||
} | } | ||
const win = Math.max(15, Math.round(Math.min(w,h)/24)); | |||
const outD = octx.createImageData(w,h); const od = outD.data; | |||
const C = 7; | |||
for (let y=0;y<h;y++) { | |||
const y0 = Math.max(0, y - win), y1 = Math.min(h-1, y + win); | |||
for (let x=0;x<w;x++) { | |||
for (let y=0;y<h;y++){ | const x0 = Math.max(0, x - win), x1 = Math.min(w-1, x + win); | ||
const A = S[y0*(w+1)+x0]; | |||
const B = S[(y1+1)*(w+1)+x0]; | |||
const Cc= S[y0*(w+1)+(x1+1)]; | |||
const Dd= S[(y1+1)*(w+1)+(x1+1)]; | |||
const area = (x1-x0+1)*(y1-y0+1); | |||
const mean = ((Dd + A - B - Cc) / area); | |||
const g = gray[y*w + x]; | |||
const pix = g < (mean - C) ? 0 : 255; | |||
const | const k = (y*w + x)*4; | ||
const | od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255; | ||
const | |||
} | } | ||
} | } | ||
octx.putImageData(outD,0,0); | |||
return out; | |||
return | |||
} | } | ||
async function preprocessImage(file) { | |||
const img = await new Promise((res, rej) => { | |||
function | const o = new Image(); | ||
const | o.onload = () => res(o); | ||
o.onerror = rej; | |||
o.src = URL.createObjectURL(file); | |||
}); | }); | ||
const base = fixCanvasOrientation(img, 2200); | |||
const bin = adaptiveThreshold(base); | |||
return { base, bin }; | |||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 4) OCR (Mehrzonen, Whitelists) | ||
// = | // ------------------------------------------------------------ | ||
async function runOCR(file) { | |||
await ensureTesseract(); | |||
setProgress(0); | |||
const { base, bin } = await preprocessImage(file); | |||
const zones = [ | |||
{ name:'header', crop:[0.00,0.00,1.00,0.28], psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -&.,’\'' }, | |||
{ name:'body', crop:[0.00,0.28,1.00,0.52], psm:6, whitelist:null }, | |||
{ name:'footer', crop:[0.00,0.80,1.00,0.20], psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 %°.,-’\'' }, | |||
]; | |||
const texts = []; | |||
const | let step = 0, total = zones.length*2; | ||
for (const z of zones) { | |||
const cropBin = cropRel(bin, ...z.crop); | |||
for (const | const cropBase = cropRel(base, ...z.crop); | ||
async function pass(canvas) { | |||
const opts = { tessedit_pageseg_mode: z.psm, preserve_interword_spaces: 1 }; | |||
if (z.whitelist) opts.tessedit_char_whitelist = z.whitelist; | |||
const out = await Tesseract.recognize(canvas, 'deu+eng', { | |||
logger: m => { if(m.status==='recognizing text') setProgress((step + m.progress)/total); } | |||
, ...opts }); | |||
step += 1; | |||
return out.data?.text || ''; | |||
} | } | ||
const t1 = await pass(cropBin); | |||
const t2 = await pass(cropBase); | |||
texts.push(t1, t2); | |||
} | } | ||
setProgress(null); | setProgress(null); | ||
const full = texts.join('\n'); | |||
// Optionales Debug auf der Seite | |||
try { | |||
if (window.ADOS_SCAN_DEBUG) { | |||
const box = document.getElementById('ados-scan-ocr'); | |||
if (box) box.textContent = full; | |||
} | |||
} catch (e) {} | |||
return full; | |||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 5) Hints extrahieren (mit Normalisierung & Fuzzy-Fixes) | ||
// | // ------------------------------------------------------------ | ||
function extractHints (text) { | function extractHints (text) { | ||
const raw = String(text || '').replace(/\s+/g, ' ').trim(); | const raw = String(text || '').replace(/\s+/g, ' ').trim(); | ||
// | // Aggressive Normalisierung | ||
let norm = raw | |||
.replace(/[“”„‟]/g,'"') | |||
.replace(/[’‘´`]/g,"'") | |||
.replace(/[|]/g,'I') | |||
.replace(/[\u2010-\u2015]/g,'-') | |||
.replace(/\s+/g,' ') | |||
.trim(); | |||
// Häufige Fixes | |||
const fixes = [ | |||
[/T[\s]*A[\s]*S[\s]*T[\s]*E[\s]*F[\s]*U[\s]*L[\s]*8/i, 'The Tasteful 8'], | |||
[/HEROE?S?\s+OF\s+CHILDHOOD/i, 'Heroes of Childhood'], | |||
[/IR(E|I)LAND/i, 'Ireland'], | |||
[/O?LOROSO/i, 'Oloroso'], | |||
[/PX/i, 'PX'], | |||
[/1ST\s*FILL/i, '1st Fill'], | |||
[/\b([12][0-9])\s*(?:Y(?:EARS?)?|YO|JAHRE?)\b/ig, (m,p)=>`${p} Years`], | |||
]; | |||
for (const [re, rep] of fixes) norm = norm.replace(re, rep); | |||
// Tokens, die im Text vorkommen | |||
const foundNames = []; | const foundNames = []; | ||
KNOWN_TOKENS.forEach(t => { | KNOWN_TOKENS.forEach(t => { | ||
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | ||
if (re.test( | if (re.test(norm)) foundNames.push(t); | ||
}); | }); | ||
// Serien | |||
if (/The Tasteful 8/i.test(norm) && !foundNames.includes('The Tasteful 8')) foundNames.push('The Tasteful 8'); | |||
if (/Heroes of Childhood/i.test(norm) && !foundNames.includes('Heroes of Childhood')) foundNames.push('Heroes of Childhood'); | |||
if (/Ireland/i.test(norm) && !foundNames.includes('Ireland')) foundNames.push('Ireland'); | |||
// Alter | |||
const ages = []; | const ages = []; | ||
let m; | |||
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | ||
while ((m = ageRe.exec(norm)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); } | |||
while ((m = ageRe.exec( | |||
// Jahrgänge | |||
const years = []; | const years = []; | ||
const yearRe = /\b(19|20)\d{2}\b/g; | const yearRe = /\b(19|20)\d{2}\b/g; | ||
while ((m = yearRe.exec( | while ((m = yearRe.exec(norm)) !== null) { if (!years.includes(m[0])) years.push(m[0]); } | ||
// ein paar markante Wörter | |||
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | ||
const uniq = new Set(); let w; const words = []; | const uniq = new Set(); let w; const words = []; | ||
while ((w = wordRe.exec( | while ((w = wordRe.exec(norm)) !== null) { | ||
const s = w[0]; | const s = w[0]; | ||
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; } | if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; } | ||
} | } | ||
return { names: foundNames, ages, years, words, raw } | return { names: foundNames, ages, years, words, raw: norm }; | ||
} | } | ||
// ------------------------------------------------------------ | |||
// 6) Suche im Wiki (3 Pässe) | |||
// ------------------------------------------------------------ | |||
async function searchWikiSmart (hints, limit) { | async function searchWikiSmart (hints, limit) { | ||
await mw.loader.using( | await mw.loader.using('mediawiki.api'); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
const ns0 = 0; | const ns0 = 0; | ||
const MAX = limit || 12; | const MAX = limit || 12; | ||
function incatStr () { | |||
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' '); | |||
} | |||
const pass1 = []; | const pass1 = []; | ||
if (hints.names.length) { | if (hints.names.length) { | ||
| Zeile 302: | Zeile 296: | ||
} | } | ||
const key = [] | const key = [] | ||
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3)) | .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3)) | ||
| Zeile 308: | Zeile 301: | ||
const pass2 = key ? [ `${key} ${incatStr()}` ] : []; | const pass2 = key ? [ `${key} ${incatStr()}` ] : []; | ||
const pass3 = []; | const pass3 = []; | ||
if (hints.names.length) pass3.push(hints.names[0]); | if (hints.names.length) pass3.push(hints.names[0]); | ||
| Zeile 328: | Zeile 320: | ||
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | ||
for (const p of pass3) { | for (const p of pass3) { | ||
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | ||
| Zeile 344: | Zeile 335: | ||
} | } | ||
// | // ------------------------------------------------------------ | ||
// 7) Treffer rendern | |||
// ------------------------------------------------------------ | |||
// | |||
function renderResults (items) { | function renderResults (items) { | ||
var box = document.getElementById('ados-scan-results'); | var box = document.getElementById('ados-scan-results'); | ||
| Zeile 413: | Zeile 359: | ||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 8) Bindings (Buttons, Dropzone, Fallbacks) | ||
// | // ------------------------------------------------------------ | ||
var BOUND = false; | var BOUND = false; | ||
function bind () { | function bind () { | ||
if (BOUND || !hasUI()) return; | if (BOUND || !hasUI()) return; | ||
var runBtn = document.getElementById('ados-scan-run'); | var runBtn = document.getElementById('ados-scan-run'); | ||
var fileIn = document.getElementById('ados-scan-file'); | var fileIn = document.getElementById('ados-scan-file'); | ||
var bigBtn = document.getElementById('ados-scan-bigbtn'); | var bigBtn = document.getElementById('ados-scan-bigbtn'); | ||
var | var drop = document.getElementById('ados-scan-drop'); | ||
if (!runBtn || !fileIn) return; | if (!runBtn || !fileIn) return; | ||
| Zeile 435: | Zeile 379: | ||
}); | }); | ||
function | // Drag&Drop | ||
if (drop) { | |||
['dragenter','dragover'].forEach(ev => | |||
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.add('is-over'); })); | |||
['dragleave','drop'].forEach(ev => | |||
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.remove('is-over'); })); | |||
drop.addEventListener('drop', e => { | |||
const f = e.dataTransfer?.files?.[0]; | |||
if (f) { fileIn.files = e.dataTransfer.files; showPreview(f); } | |||
}); | |||
} | |||
runBtn.addEventListener('click', async function (ev) { | |||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
var f = fileIn.files[0]; | var f = fileIn.files[0]; | ||
try { | |||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | |||
setStatus('Erkenne Label …'); | |||
var text = await runOCR(f); | |||
if (window.ADOS_SCAN_DEBUG) { | |||
const dbg = document.getElementById('ados-scan-ocr'); | |||
if (dbg) dbg.textContent = text; | |||
} | } | ||
})(); | setStatus('Suche im Wiki …'); | ||
} | var hints = extractHints(text); | ||
var hits = await searchWikiSmart(hints, 12); | |||
renderResults(hits); | |||
setStatus('Fertig.'); | |||
} catch (e) { | |||
console.error('[LabelScan]', e); | |||
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | |||
} finally { | |||
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; | |||
} | |||
}); | |||
// Sicherheit gegen Overlays | |||
// Sicherheit | |||
var wrap = document.getElementById('ados-labelscan'); | var wrap = document.getElementById('ados-labelscan'); | ||
if (wrap) wrap.style.position = 'relative'; | if (wrap) wrap.style.position = 'relative'; | ||
| Zeile 481: | Zeile 424: | ||
} | } | ||
// initial & Fallback-Bindings | |||
if (document.readyState === 'loading') { | if (document.readyState === 'loading') { | ||
document.addEventListener('DOMContentLoaded', bind); | document.addEventListener('DOMContentLoaded', bind); | ||
| Zeile 490: | Zeile 434: | ||
var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | ||
mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | ||
})(); | })(); | ||
Version vom 6. November 2025, 00:40 Uhr
/* global mw, Tesseract */
(function () {
'use strict';
// ------------------------------------------------------------
// 0) Konfiguration
// ------------------------------------------------------------
// Debug-Ausgabe der reinen OCR-Texte (Optional: im Browser einstellen)
// window.ADOS_SCAN_DEBUG = true;
// In diesen Kategorien sollen Treffer bevorzugt gesucht werden:
const ADOS_CATEGORIES = [
'Alle A Dream of Scotland Abfüllungen',
'Alle A Dream of Ireland Abfüllungen',
'Alle A Dream of... – Der Rest der Welt Abfüllungen',
'Friendly Mr. Z Whiskytainment Abfüllungen',
'Die Whisky Elfen Abfüllungen',
'The Fine Art of Whisky Abfüllungen',
'Alle Rumbastic Abfüllungen'
];
// Distillery-/Marken-Tokens (wird für „hints“ verwendet)
const KNOWN_TOKENS = [
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
];
// ------------------------------------------------------------
// 1) UI Helpers
// ------------------------------------------------------------
function hasUI () {
return !!document.getElementById('ados-scan-run') &&
!!document.getElementById('ados-scan-file');
}
function setStatus (t) {
var el = document.getElementById('ados-scan-status');
if (el) el.textContent = t || '';
}
function setProgress (p) {
var bar = document.getElementById('ados-scan-progress');
if (!bar) return;
if (p == null) { bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
}
function showPreview (file) {
var url = URL.createObjectURL(file);
var prev = document.getElementById('ados-scan-preview');
if (prev) {
prev.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" src="' + url + '">';
prev.setAttribute('aria-hidden', 'false');
}
}
function esc (s) { return mw.html.escape(String(s || '')); }
// ------------------------------------------------------------
// 2) Tesseract bei Bedarf laden
// ------------------------------------------------------------
var tesseractReady;
function ensureTesseract () {
if (tesseractReady) return tesseractReady;
tesseractReady = new Promise(function (resolve, reject) {
if (window.Tesseract) return resolve();
var s = document.createElement('script');
s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
s.async = true;
s.onload = resolve;
s.onerror = function () {
var s2 = document.createElement('script');
s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
s2.async = true;
s2.onload = resolve;
s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
document.head.appendChild(s2);
};
document.head.appendChild(s);
});
return tesseractReady;
}
// ------------------------------------------------------------
// 3) Bild-Vorverarbeitung
// - skalieren
// - adaptives Thresholding (besser gegen Glanz/Folie)
// - relative Crops zum Auslesen bestimmter Zonen
// ------------------------------------------------------------
function fixCanvasOrientation(img, maxSide=2200) {
const scale = Math.min(1, maxSide / Math.max(img.width, img.height));
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d');
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
return c;
}
function cropRel(srcCanvas, x, y, w, h) {
const sw = srcCanvas.width, sh = srcCanvas.height;
const cx = Math.round(x * sw), cy = Math.round(y * sh);
const cw = Math.round(w * sw), ch = Math.round(h * sh);
const out = document.createElement('canvas');
out.width = cw; out.height = ch;
const octx = out.getContext('2d');
octx.drawImage(srcCanvas, cx, cy, cw, ch, 0, 0, cw, ch);
return out;
}
function adaptiveThreshold(srcCanvas) {
const w = srcCanvas.width, h = srcCanvas.height;
const out = document.createElement('canvas'); out.width = w; out.height = h;
const sctx = srcCanvas.getContext('2d');
const octx = out.getContext('2d');
const id = sctx.getImageData(0,0,w,h);
const d = id.data;
const gray = new Uint8ClampedArray(w*h);
for (let i=0,j=0;i<d.length;i+=4,++j) {
gray[j] = (0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2])|0;
}
const S = new Uint32Array((w+1)*(h+1));
for (let y=1;y<=h;y++) {
let rowsum = 0;
for (let x=1;x<=w;x++) {
const v = gray[(y-1)*w + (x-1)];
rowsum += v;
S[y*(w+1)+x] = S[(y-1)*(w+1)+x] + rowsum;
}
}
const win = Math.max(15, Math.round(Math.min(w,h)/24));
const outD = octx.createImageData(w,h); const od = outD.data;
const C = 7;
for (let y=0;y<h;y++) {
const y0 = Math.max(0, y - win), y1 = Math.min(h-1, y + win);
for (let x=0;x<w;x++) {
const x0 = Math.max(0, x - win), x1 = Math.min(w-1, x + win);
const A = S[y0*(w+1)+x0];
const B = S[(y1+1)*(w+1)+x0];
const Cc= S[y0*(w+1)+(x1+1)];
const Dd= S[(y1+1)*(w+1)+(x1+1)];
const area = (x1-x0+1)*(y1-y0+1);
const mean = ((Dd + A - B - Cc) / area);
const g = gray[y*w + x];
const pix = g < (mean - C) ? 0 : 255;
const k = (y*w + x)*4;
od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255;
}
}
octx.putImageData(outD,0,0);
return out;
}
async function preprocessImage(file) {
const img = await new Promise((res, rej) => {
const o = new Image();
o.onload = () => res(o);
o.onerror = rej;
o.src = URL.createObjectURL(file);
});
const base = fixCanvasOrientation(img, 2200);
const bin = adaptiveThreshold(base);
return { base, bin };
}
// ------------------------------------------------------------
// 4) OCR (Mehrzonen, Whitelists)
// ------------------------------------------------------------
async function runOCR(file) {
await ensureTesseract();
setProgress(0);
const { base, bin } = await preprocessImage(file);
const zones = [
{ name:'header', crop:[0.00,0.00,1.00,0.28], psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -&.,’\'' },
{ name:'body', crop:[0.00,0.28,1.00,0.52], psm:6, whitelist:null },
{ name:'footer', crop:[0.00,0.80,1.00,0.20], psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 %°.,-’\'' },
];
const texts = [];
let step = 0, total = zones.length*2;
for (const z of zones) {
const cropBin = cropRel(bin, ...z.crop);
const cropBase = cropRel(base, ...z.crop);
async function pass(canvas) {
const opts = { tessedit_pageseg_mode: z.psm, preserve_interword_spaces: 1 };
if (z.whitelist) opts.tessedit_char_whitelist = z.whitelist;
const out = await Tesseract.recognize(canvas, 'deu+eng', {
logger: m => { if(m.status==='recognizing text') setProgress((step + m.progress)/total); }
, ...opts });
step += 1;
return out.data?.text || '';
}
const t1 = await pass(cropBin);
const t2 = await pass(cropBase);
texts.push(t1, t2);
}
setProgress(null);
const full = texts.join('\n');
// Optionales Debug auf der Seite
try {
if (window.ADOS_SCAN_DEBUG) {
const box = document.getElementById('ados-scan-ocr');
if (box) box.textContent = full;
}
} catch (e) {}
return full;
}
// ------------------------------------------------------------
// 5) Hints extrahieren (mit Normalisierung & Fuzzy-Fixes)
// ------------------------------------------------------------
function extractHints (text) {
const raw = String(text || '').replace(/\s+/g, ' ').trim();
// Aggressive Normalisierung
let norm = raw
.replace(/[“”„‟]/g,'"')
.replace(/[’‘´`]/g,"'")
.replace(/[|]/g,'I')
.replace(/[\u2010-\u2015]/g,'-')
.replace(/\s+/g,' ')
.trim();
// Häufige Fixes
const fixes = [
[/T[\s]*A[\s]*S[\s]*T[\s]*E[\s]*F[\s]*U[\s]*L[\s]*8/i, 'The Tasteful 8'],
[/HEROE?S?\s+OF\s+CHILDHOOD/i, 'Heroes of Childhood'],
[/IR(E|I)LAND/i, 'Ireland'],
[/O?LOROSO/i, 'Oloroso'],
[/PX/i, 'PX'],
[/1ST\s*FILL/i, '1st Fill'],
[/\b([12][0-9])\s*(?:Y(?:EARS?)?|YO|JAHRE?)\b/ig, (m,p)=>`${p} Years`],
];
for (const [re, rep] of fixes) norm = norm.replace(re, rep);
// Tokens, die im Text vorkommen
const foundNames = [];
KNOWN_TOKENS.forEach(t => {
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
if (re.test(norm)) foundNames.push(t);
});
// Serien
if (/The Tasteful 8/i.test(norm) && !foundNames.includes('The Tasteful 8')) foundNames.push('The Tasteful 8');
if (/Heroes of Childhood/i.test(norm) && !foundNames.includes('Heroes of Childhood')) foundNames.push('Heroes of Childhood');
if (/Ireland/i.test(norm) && !foundNames.includes('Ireland')) foundNames.push('Ireland');
// Alter
const ages = [];
let m;
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
while ((m = ageRe.exec(norm)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }
// Jahrgänge
const years = [];
const yearRe = /\b(19|20)\d{2}\b/g;
while ((m = yearRe.exec(norm)) !== null) { if (!years.includes(m[0])) years.push(m[0]); }
// ein paar markante Wörter
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
const uniq = new Set(); let w; const words = [];
while ((w = wordRe.exec(norm)) !== null) {
const s = w[0];
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
}
return { names: foundNames, ages, years, words, raw: norm };
}
// ------------------------------------------------------------
// 6) Suche im Wiki (3 Pässe)
// ------------------------------------------------------------
async function searchWikiSmart (hints, limit) {
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
const ns0 = 0;
const MAX = limit || 12;
function incatStr () {
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
}
const pass1 = [];
if (hints.names.length) {
hints.names.forEach(n => {
if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
pass1.push(`intitle:"${n}" ${incatStr()}`);
});
}
const key = []
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
.map(x => `"${x}"`).join(' ');
const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
const pass3 = [];
if (hints.names.length) pass3.push(hints.names[0]);
if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
const seen = new Set(); const out = [];
async function runSr (q) {
const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
(r.query?.search || []).forEach(it => {
const k = it.title;
if (seen.has(k)) return;
seen.add(k);
out.push(it);
});
}
for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
for (const p of pass3) {
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
(r.query?.prefixsearch || []).forEach(it => {
const title = it.title || it['*'];
const k = title;
if (seen.has(k)) return;
seen.add(k);
out.push({ title, snippet: '' });
});
if (out.length >= MAX) break;
}
return out.slice(0, MAX);
}
// ------------------------------------------------------------
// 7) Treffer rendern
// ------------------------------------------------------------
function renderResults (items) {
var box = document.getElementById('ados-scan-results');
if (!box) return;
box.innerHTML = '';
if (!items || !items.length) {
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.slice(0, 12).forEach(function (it) {
var title = it.title || '';
var link = mw.util.getUrl(title.replace(/ /g, '_'));
var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/"/g, '"');
var div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML =
'<b><a href="' + link + '">' + esc(title) + '</a></b>' +
(snip ? '<div class="meta">' + snip + '</div>' : '');
box.appendChild(div);
});
}
// ------------------------------------------------------------
// 8) Bindings (Buttons, Dropzone, Fallbacks)
// ------------------------------------------------------------
var BOUND = false;
function bind () {
if (BOUND || !hasUI()) return;
var runBtn = document.getElementById('ados-scan-run');
var fileIn = document.getElementById('ados-scan-file');
var bigBtn = document.getElementById('ados-scan-bigbtn');
var drop = document.getElementById('ados-scan-drop');
if (!runBtn || !fileIn) return;
if (runBtn.dataset.bound === '1') return;
runBtn.dataset.bound = '1'; BOUND = true;
if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
fileIn.addEventListener('change', function () {
if (this.files && this.files[0]) showPreview(this.files[0]);
});
// Drag&Drop
if (drop) {
['dragenter','dragover'].forEach(ev =>
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.add('is-over'); }));
['dragleave','drop'].forEach(ev =>
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.remove('is-over'); }));
drop.addEventListener('drop', e => {
const f = e.dataTransfer?.files?.[0];
if (f) { fileIn.files = e.dataTransfer.files; showPreview(f); }
});
}
runBtn.addEventListener('click', async function (ev) {
ev.preventDefault();
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
var f = fileIn.files[0];
try {
runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
setStatus('Erkenne Label …');
var text = await runOCR(f);
if (window.ADOS_SCAN_DEBUG) {
const dbg = document.getElementById('ados-scan-ocr');
if (dbg) dbg.textContent = text;
}
setStatus('Suche im Wiki …');
var hints = extractHints(text);
var hits = await searchWikiSmart(hints, 12);
renderResults(hits);
setStatus('Fertig.');
} catch (e) {
console.error('[LabelScan]', e);
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen';
}
});
// Sicherheit gegen Overlays
var wrap = document.getElementById('ados-labelscan');
if (wrap) wrap.style.position = 'relative';
runBtn.style.position = 'relative';
runBtn.style.zIndex = '9999';
runBtn.style.pointerEvents = 'auto';
}
// initial & Fallback-Bindings
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
setTimeout(bind, 250);
setTimeout(bind, 1000);
var mo = new MutationObserver(function () { if (!BOUND) bind(); });
mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();