MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 3: | Zeile 3: | ||
'use strict'; | 'use strict'; | ||
// | // ------------------------------------------------------------ | ||
// | // 0) Konfiguration | ||
// = | // ------------------------------------------------------------ | ||
// Debug-Ausgabe der reinen OCR-Texte (Optional: im Browser einstellen) | |||
// window.ADOS_SCAN_DEBUG = true; | |||
// | // In diesen Kategorien sollen Treffer bevorzugt gesucht werden: | ||
const ADOS_CATEGORIES = [ | const ADOS_CATEGORIES = [ | ||
'Alle A Dream of Scotland Abfüllungen', | 'Alle A Dream of Scotland Abfüllungen', | ||
| Zeile 18: | Zeile 20: | ||
]; | ]; | ||
// Distillery-/Marken-Tokens (wird für „hints“ verwendet) | |||
// Distillery / | const KNOWN_TOKENS = [ | ||
'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore', | |||
'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin', | |||
'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower', | |||
'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland' | |||
]; | |||
]; | |||
// ------------------------------------------------------------ | |||
// 1) UI Helpers | |||
// ------------------------------------------------------------ | |||
function hasUI () { | function hasUI () { | ||
return !!document.getElementById('ados-scan-run') && | return !!document.getElementById('ados-scan-run') && | ||
!!document.getElementById('ados-scan-file'); | !!document.getElementById('ados-scan-file'); | ||
} | } | ||
function setStatus (t) { | function setStatus (t) { | ||
var el = document.getElementById('ados-scan-status'); | var el = document.getElementById('ados-scan-status'); | ||
if (el) el.textContent = t || ''; | if (el) el.textContent = t || ''; | ||
} | } | ||
function setProgress (p) { | function setProgress (p) { | ||
var bar = document.getElementById('ados-scan-progress'); | var bar = document.getElementById('ados-scan-progress'); | ||
| Zeile 60: | Zeile 45: | ||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); } | ||
} | } | ||
function showPreview (file) { | function showPreview (file) { | ||
var url = URL.createObjectURL(file); | var url = URL.createObjectURL(file); | ||
var prev = document.getElementById('ados-scan-preview'); | var prev = document.getElementById('ados-scan-preview'); | ||
if (prev) { | if (prev) { | ||
prev.innerHTML = '<img alt="Vorschau" src="' + url + '">'; | prev.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" src="' + url + '">'; | ||
prev.setAttribute('aria-hidden', 'false'); | prev.setAttribute('aria-hidden', 'false'); | ||
} | } | ||
} | } | ||
function esc (s) { return mw.html.escape(String(s || '')); } | |||
// ------------------------------------------------------------ | |||
// 2) Tesseract bei Bedarf laden | |||
// ------------------------------------------------------------ | |||
// | |||
// | |||
var tesseractReady; | var tesseractReady; | ||
function ensureTesseract () { | function ensureTesseract () { | ||
| Zeile 101: | Zeile 80: | ||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 3) Bild-Vorverarbeitung | ||
// | // - skalieren | ||
// | // - adaptives Thresholding (besser gegen Glanz/Folie) | ||
// - relative Crops zum Auslesen bestimmter Zonen | |||
// ------------------------------------------------------------ | |||
const | function fixCanvasOrientation(img, maxSide=2200) { | ||
const scale = Math.min(1, maxSide / Math.max(img.width, img.height)); | |||
const w = Math.round(img.width * scale); | |||
const h = Math.round(img.height * scale); | |||
const c = document.createElement('canvas'); | |||
}) | c.width = w; c.height = h; | ||
const ctx = c.getContext('2d'); | |||
const | ctx.imageSmoothingEnabled = true; | ||
const | ctx.drawImage(img, 0, 0, w, h); | ||
const | return c; | ||
} | |||
const | function cropRel(srcCanvas, x, y, w, h) { | ||
const | const sw = srcCanvas.width, sh = srcCanvas.height; | ||
const cx = Math.round(x * sw), cy = Math.round(y * sh); | |||
const cw = Math.round(w * sw), ch = Math.round(h * sh); | |||
const out = document.createElement('canvas'); | |||
out.width = cw; out.height = ch; | |||
const octx = out.getContext('2d'); | |||
octx.drawImage(srcCanvas, cx, cy, cw, ch, 0, 0, cw, ch); | |||
return out; | |||
} | |||
function adaptiveThreshold(srcCanvas) { | |||
const w = srcCanvas.width, h = srcCanvas.height; | |||
const out = document.createElement('canvas'); out.width = w; out.height = h; | |||
const sctx = srcCanvas.getContext('2d'); | |||
const octx = out.getContext('2d'); | |||
const id = sctx.getImageData(0,0,w,h); | |||
const d = id.data; | |||
const gray = new Uint8ClampedArray(w*h); | |||
for (let i=0,j=0;i<d.length;i+=4,++j) { | |||
for (let i=0;i<d.length;i+=4){ | gray[j] = (0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2])|0; | ||
} | } | ||
const S = new Uint32Array((w+1)*(h+1)); | |||
for (let y=1;y<=h;y++) { | |||
let rowsum = 0; | |||
for (let x=1;x<=w;x++) { | |||
const | const v = gray[(y-1)*w + (x-1)]; | ||
rowsum += v; | |||
for (let y=1;y<h | S[y*(w+1)+x] = S[(y-1)*(w+1)+x] + rowsum; | ||
for (let x=1;x<w | |||
const | |||
} | } | ||
} | } | ||
const win = Math.max(15, Math.round(Math.min(w,h)/24)); | |||
const outD = octx.createImageData(w,h); const od = outD.data; | |||
const C = 7; | |||
for (let y=0;y<h;y++) { | |||
const y0 = Math.max(0, y - win), y1 = Math.min(h-1, y + win); | |||
for (let x=0;x<w;x++) { | |||
for (let y=0;y<h;y++){ | const x0 = Math.max(0, x - win), x1 = Math.min(w-1, x + win); | ||
const A = S[y0*(w+1)+x0]; | |||
const B = S[(y1+1)*(w+1)+x0]; | |||
const Cc= S[y0*(w+1)+(x1+1)]; | |||
const Dd= S[(y1+1)*(w+1)+(x1+1)]; | |||
const area = (x1-x0+1)*(y1-y0+1); | |||
const mean = ((Dd + A - B - Cc) / area); | |||
const g = gray[y*w + x]; | |||
const pix = g < (mean - C) ? 0 : 255; | |||
const | const k = (y*w + x)*4; | ||
const | od[k]=od[k+1]=od[k+2]=pix; od[k+3]=255; | ||
const | |||
} | } | ||
} | } | ||
octx.putImageData(outD,0,0); | |||
return out; | |||
return | |||
} | } | ||
async function preprocessImage(file) { | |||
const img = await new Promise((res, rej) => { | |||
function | const o = new Image(); | ||
const | o.onload = () => res(o); | ||
o.onerror = rej; | |||
o.src = URL.createObjectURL(file); | |||
}); | }); | ||
const base = fixCanvasOrientation(img, 2200); | |||
const bin = adaptiveThreshold(base); | |||
return { base, bin }; | |||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 4) OCR (Mehrzonen, Whitelists) | ||
// = | // ------------------------------------------------------------ | ||
async function runOCR(file) { | |||
await ensureTesseract(); | |||
setProgress(0); | |||
const { base, bin } = await preprocessImage(file); | |||
const zones = [ | |||
{ name:'header', crop:[0.00,0.00,1.00,0.28], psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -&.,’\'' }, | |||
{ name:'body', crop:[0.00,0.28,1.00,0.52], psm:6, whitelist:null }, | |||
{ name:'footer', crop:[0.00,0.80,1.00,0.20], psm:6, whitelist:'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 %°.,-’\'' }, | |||
]; | |||
const texts = []; | |||
const | let step = 0, total = zones.length*2; | ||
for (const z of zones) { | |||
const cropBin = cropRel(bin, ...z.crop); | |||
for (const | const cropBase = cropRel(base, ...z.crop); | ||
async function pass(canvas) { | |||
const opts = { tessedit_pageseg_mode: z.psm, preserve_interword_spaces: 1 }; | |||
if (z.whitelist) opts.tessedit_char_whitelist = z.whitelist; | |||
const out = await Tesseract.recognize(canvas, 'deu+eng', { | |||
logger: m => { if(m.status==='recognizing text') setProgress((step + m.progress)/total); } | |||
, ...opts }); | |||
step += 1; | |||
return out.data?.text || ''; | |||
} | } | ||
const t1 = await pass(cropBin); | |||
const t2 = await pass(cropBase); | |||
texts.push(t1, t2); | |||
} | } | ||
setProgress(null); | setProgress(null); | ||
const full = texts.join('\n'); | |||
// Optionales Debug auf der Seite | |||
try { | |||
if (window.ADOS_SCAN_DEBUG) { | |||
const box = document.getElementById('ados-scan-ocr'); | |||
if (box) box.textContent = full; | |||
} | |||
} catch (e) {} | |||
return full; | |||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 5) Hints extrahieren (mit Normalisierung & Fuzzy-Fixes) | ||
// | // ------------------------------------------------------------ | ||
function extractHints (text) { | function extractHints (text) { | ||
const raw = String(text || '').replace(/\s+/g, ' ').trim(); | const raw = String(text || '').replace(/\s+/g, ' ').trim(); | ||
// | // Aggressive Normalisierung | ||
let norm = raw | |||
.replace(/[“”„‟]/g,'"') | |||
.replace(/[’‘´`]/g,"'") | |||
.replace(/[|]/g,'I') | |||
.replace(/[\u2010-\u2015]/g,'-') | |||
.replace(/\s+/g,' ') | |||
.trim(); | |||
// Häufige Fixes | |||
const fixes = [ | |||
[/T[\s]*A[\s]*S[\s]*T[\s]*E[\s]*F[\s]*U[\s]*L[\s]*8/i, 'The Tasteful 8'], | |||
[/HEROE?S?\s+OF\s+CHILDHOOD/i, 'Heroes of Childhood'], | |||
[/IR(E|I)LAND/i, 'Ireland'], | |||
[/O?LOROSO/i, 'Oloroso'], | |||
[/PX/i, 'PX'], | |||
[/1ST\s*FILL/i, '1st Fill'], | |||
[/\b([12][0-9])\s*(?:Y(?:EARS?)?|YO|JAHRE?)\b/ig, (m,p)=>`${p} Years`], | |||
]; | |||
for (const [re, rep] of fixes) norm = norm.replace(re, rep); | |||
// Tokens, die im Text vorkommen | |||
const foundNames = []; | const foundNames = []; | ||
KNOWN_TOKENS.forEach(t => { | KNOWN_TOKENS.forEach(t => { | ||
const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i'); | ||
if (re.test( | if (re.test(norm)) foundNames.push(t); | ||
}); | }); | ||
// Serien | |||
if (/The Tasteful 8/i.test(norm) && !foundNames.includes('The Tasteful 8')) foundNames.push('The Tasteful 8'); | |||
if (/Heroes of Childhood/i.test(norm) && !foundNames.includes('Heroes of Childhood')) foundNames.push('Heroes of Childhood'); | |||
if (/Ireland/i.test(norm) && !foundNames.includes('Ireland')) foundNames.push('Ireland'); | |||
// Alter | |||
const ages = []; | const ages = []; | ||
let m; | |||
const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi; | ||
while ((m = ageRe.exec(norm)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); } | |||
while ((m = ageRe.exec( | |||
// Jahrgänge | |||
const years = []; | const years = []; | ||
const yearRe = /\b(19|20)\d{2}\b/g; | const yearRe = /\b(19|20)\d{2}\b/g; | ||
while ((m = yearRe.exec( | while ((m = yearRe.exec(norm)) !== null) { if (!years.includes(m[0])) years.push(m[0]); } | ||
// ein paar markante Wörter | |||
const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g; | ||
const uniq = new Set(); let w; const words = []; | const uniq = new Set(); let w; const words = []; | ||
while ((w = wordRe.exec( | while ((w = wordRe.exec(norm)) !== null) { | ||
const s = w[0]; | const s = w[0]; | ||
if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; } | if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; } | ||
} | } | ||
return { names: foundNames, ages, years, words, raw } | return { names: foundNames, ages, years, words, raw: norm }; | ||
} | } | ||
// ------------------------------------------------------------ | |||
// 6) Suche im Wiki (3 Pässe) | |||
// ------------------------------------------------------------ | |||
async function searchWikiSmart (hints, limit) { | async function searchWikiSmart (hints, limit) { | ||
await mw.loader.using( | await mw.loader.using('mediawiki.api'); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
const ns0 = 0; | const ns0 = 0; | ||
const MAX = limit || 12; | const MAX = limit || 12; | ||
function incatStr () { | |||
return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' '); | |||
} | |||
const pass1 = []; | const pass1 = []; | ||
if (hints.names.length) { | if (hints.names.length) { | ||
| Zeile 302: | Zeile 296: | ||
} | } | ||
const key = [] | const key = [] | ||
.concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3)) | .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3)) | ||
| Zeile 308: | Zeile 301: | ||
const pass2 = key ? [ `${key} ${incatStr()}` ] : []; | const pass2 = key ? [ `${key} ${incatStr()}` ] : []; | ||
const pass3 = []; | const pass3 = []; | ||
if (hints.names.length) pass3.push(hints.names[0]); | if (hints.names.length) pass3.push(hints.names[0]); | ||
| Zeile 328: | Zeile 320: | ||
for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); } | ||
for (const p of pass3) { | for (const p of pass3) { | ||
const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX }); | ||
| Zeile 344: | Zeile 335: | ||
} | } | ||
// | // ------------------------------------------------------------ | ||
// 7) Treffer rendern | |||
// ------------------------------------------------------------ | |||
// | |||
function renderResults (items) { | function renderResults (items) { | ||
var box = document.getElementById('ados-scan-results'); | var box = document.getElementById('ados-scan-results'); | ||
| Zeile 413: | Zeile 359: | ||
} | } | ||
// | // ------------------------------------------------------------ | ||
// | // 8) Bindings (Buttons, Dropzone, Fallbacks) | ||
// | // ------------------------------------------------------------ | ||
var BOUND = false; | var BOUND = false; | ||
function bind () { | function bind () { | ||
if (BOUND || !hasUI()) return; | if (BOUND || !hasUI()) return; | ||
var runBtn = document.getElementById('ados-scan-run'); | var runBtn = document.getElementById('ados-scan-run'); | ||
var fileIn = document.getElementById('ados-scan-file'); | var fileIn = document.getElementById('ados-scan-file'); | ||
var bigBtn = document.getElementById('ados-scan-bigbtn'); | var bigBtn = document.getElementById('ados-scan-bigbtn'); | ||
var | var drop = document.getElementById('ados-scan-drop'); | ||
if (!runBtn || !fileIn) return; | if (!runBtn || !fileIn) return; | ||
| Zeile 435: | Zeile 379: | ||
}); | }); | ||
function | // Drag&Drop | ||
if (drop) { | |||
['dragenter','dragover'].forEach(ev => | |||
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.add('is-over'); })); | |||
['dragleave','drop'].forEach(ev => | |||
drop.addEventListener(ev, e => { e.preventDefault(); drop.classList.remove('is-over'); })); | |||
drop.addEventListener('drop', e => { | |||
const f = e.dataTransfer?.files?.[0]; | |||
if (f) { fileIn.files = e.dataTransfer.files; showPreview(f); } | |||
}); | |||
} | |||
runBtn.addEventListener('click', async function (ev) { | |||
ev.preventDefault(); | ev.preventDefault(); | ||
if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; } | ||
var f = fileIn.files[0]; | var f = fileIn.files[0]; | ||
try { | |||
runBtn.disabled = true; runBtn.textContent = 'Erkenne …'; | |||
setStatus('Erkenne Label …'); | |||
var text = await runOCR(f); | |||
if (window.ADOS_SCAN_DEBUG) { | |||
const dbg = document.getElementById('ados-scan-ocr'); | |||
if (dbg) dbg.textContent = text; | |||
} | } | ||
})(); | setStatus('Suche im Wiki …'); | ||
} | var hints = extractHints(text); | ||
var hits = await searchWikiSmart(hints, 12); | |||
renderResults(hits); | |||
setStatus('Fertig.'); | |||
} catch (e) { | |||
console.error('[LabelScan]', e); | |||
setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | |||
} finally { | |||
runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen'; | |||
} | |||
}); | |||
// Sicherheit gegen Overlays | |||
// Sicherheit | |||
var wrap = document.getElementById('ados-labelscan'); | var wrap = document.getElementById('ados-labelscan'); | ||
if (wrap) wrap.style.position = 'relative'; | if (wrap) wrap.style.position = 'relative'; | ||
| Zeile 481: | Zeile 424: | ||
} | } | ||
// initial & Fallback-Bindings | |||
if (document.readyState === 'loading') { | if (document.readyState === 'loading') { | ||
document.addEventListener('DOMContentLoaded', bind); | document.addEventListener('DOMContentLoaded', bind); | ||
| Zeile 490: | Zeile 434: | ||
var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | var mo = new MutationObserver(function () { if (!BOUND) bind(); }); | ||
mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | mo.observe(document.documentElement || document.body, { childList: true, subtree: true }); | ||
})(); | })(); | ||