MediaWiki:Gadget-LabelScan.js
Erscheinungsbild
Hinweis: Leere nach dem Veröffentlichen den Browser-Cache, um die Änderungen sehen zu können.
- Firefox/Safari: Umschalttaste drücken und gleichzeitig Aktualisieren anklicken oder entweder Strg+F5 oder Strg+R (⌘+R auf dem Mac) drücken
- Google Chrome: Umschalttaste+Strg+R (⌘+Umschalttaste+R auf dem Mac) drücken
- Edge: Strg+F5 drücken oder Strg drücken und gleichzeitig Aktualisieren anklicken
/* global mw */
(function () {
'use strict';
// ---------- Hilfsfunktionen für UI ----------
function $(id){ return document.getElementById(id); }
function status(t){ const el=$('ados-scan-status'); if(el) el.textContent = t || ''; }
function progress(v){
const bar=$('ados-scan-progress');
if(!bar) return;
if(v == null){ bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, v)); }
}
function preview(file){
const p = $('ados-scan-preview');
if(!p) return;
const url = URL.createObjectURL(file);
p.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" />';
p.querySelector('img').src = url;
}
// ---------- Tesseract sauber laden (Worker/WASM/CDN) ----------
let _tessReady = null;
const TESS_CDN = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/';
async function ensureTesseract(){
if(_tessReady) return _tessReady;
_tessReady = new Promise((resolve, reject) => {
if(window.Tesseract) return resolve();
const s = document.createElement('script');
s.src = TESS_CDN + 'tesseract.min.js';
s.async = true;
s.onload = () => resolve();
s.onerror = () => reject(new Error('Tesseract.js konnte nicht geladen werden'));
document.head.appendChild(s);
});
return _tessReady;
}
// Kleines Canvas-Preprocessing (Grayscale & leichter Kontrast)
async function preprocess(file){
const img = await new Promise((res, rej) => {
const i = new Image();
i.onload = () => res(i);
i.onerror = rej;
i.src = URL.createObjectURL(file);
});
const MAX = 1800;
const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d');
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
const id = ctx.getImageData(0,0,w,h), d=id.data;
for(let i=0;i<d.length;i+=4){
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
const v = Math.max(0, Math.min(255, (g-128)*1.12 + 128)); // leichte Kontrastanhebung
d[i]=d[i+1]=d[i+2]=v;
}
ctx.putImageData(id,0,0);
return c;
}
// ---------- OCR ----------
async function runOCR(file){
await ensureTesseract();
const { createWorker } = window.Tesseract;
// Pfade hart setzen, damit Worker/WASM sicher gefunden werden
const worker = await createWorker({
workerPath: TESS_CDN + 'worker.min.js',
langPath: TESS_CDN + 'langs/',
corePath: TESS_CDN + 'tesseract-core.wasm.js',
logger: m => {
if(m && typeof m.progress === 'number'){
progress(m.progress);
}
if(m && m.status) {
// optionales Debug
// console.log('[OCR]', m.status, m.progress ?? '');
}
}
});
try {
await worker.loadLanguage('eng+deu');
await worker.initialize('eng+deu');
const canvas = await preprocess(file);
// psm 6 = Block Text; oem 1 = LSTM
await worker.setParameters({
tessedit_pageseg_mode: '6',
preserve_interword_spaces: '1'
});
const result = await worker.recognize(canvas);
const text = (result && result.data && result.data.text) ? result.data.text : '';
return text.trim();
} finally {
await worker.terminate();
progress(null);
}
}
// ---------- einfache Treffer-Suche im Wiki ----------
async function searchTitles(q){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
// sehr einfache Heuristik: nimm 3–5 „gute“ Wörter aus dem OCR
const words = String(q || '')
.replace(/[^\p{L}\p{N}\s\-']/gu,' ')
.replace(/\s+/g,' ')
.trim()
.split(' ')
.filter(w => w.length >= 3)
.slice(0, 6);
if(!words.length) return [];
const sr = await api.get({
action: 'query',
list: 'search',
srsearch: words.map(w => `"${w}"`).join(' '), // UND-verkettet
srlimit: 12,
srnamespace: 0,
formatversion: 2
});
return (sr.query && sr.query.search) ? sr.query.search : [];
}
function renderResults(items){
const box = $('ados-scan-results');
if(!box) return;
box.innerHTML = '';
if(!items || !items.length){
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.forEach(it => {
const title = it.title || '';
const link = mw.util.getUrl(title.replace(/ /g,'_'));
const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"');
const div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML = '<b><a href="'+link+'">'+mw.html.escape(title)+'</a></b>' + (snip ? '<div class="meta">'+snip+'</div>' : '');
box.appendChild(div);
});
}
// ---------- Bindings ----------
function bind(){
const run = $('ados-scan-run');
const file= $('ados-scan-file');
const big = $('ados-scan-bigbtn');
if(!run || !file) return;
if(big){ big.addEventListener('click', () => file.click()); }
file.addEventListener('change', () => { if(file.files && file.files[0]) preview(file.files[0]); });
run.addEventListener('click', async (ev) => {
ev.preventDefault();
if(!(file.files && file.files[0])){
alert('Bitte zuerst ein Foto auswählen oder aufnehmen.');
return;
}
const f = file.files[0];
run.disabled = true;
const old = run.textContent;
run.textContent = 'Erkenne …';
status('Erkenne Label …');
progress(0);
try {
// 1) OCR
const text = await runOCR(f);
// Debug-Ausgabe für dich:
const dbg = $('ados-scan-ocr');
if(dbg){ dbg.textContent = text; }
// 2) Suche
status('Suche im Wiki …');
const hits = await searchTitles(text);
renderResults(hits);
status('Fertig.');
} catch(err){
console.error('[LabelScan] Fehler:', err);
status('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
progress(null);
run.disabled = false;
run.textContent = old;
}
});
console.log('[LabelScan] Gadget gebunden.');
}
if(document.readyState === 'loading'){
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
})();