MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 1: | Zeile 1: | ||
/* global mw | /* global mw */ | ||
(function () { | (function () { | ||
'use strict'; | 'use strict'; | ||
// | // ---------- Hilfsfunktionen für UI ---------- | ||
function $(id){ return document.getElementById(id); } | |||
function status(t){ const el=$('ados-scan-status'); if(el) el.textContent = t || ''; } | |||
function progress(v){ | |||
const bar=$('ados-scan-progress'); | |||
if(!bar) return; | |||
if(v == null){ bar.hidden = true; bar.value = 0; } | |||
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, v)); } | |||
function | |||
function | |||
} | } | ||
function | function preview(file){ | ||
const p = $('ados-scan-preview'); | |||
const | if(!p) return; | ||
if (p | |||
const url = URL.createObjectURL(file); | const url = URL.createObjectURL(file); | ||
p.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" />'; | |||
p.querySelector('img').src = url; | |||
} | } | ||
// | // ---------- Tesseract sauber laden (Worker/WASM/CDN) ---------- | ||
let _tessReady = null; | |||
const TESS_CDN = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/'; | |||
let | async function ensureTesseract(){ | ||
if(_tessReady) return _tessReady; | |||
_tessReady = new Promise((resolve, reject) => { | |||
if(window.Tesseract) return resolve(); | |||
const s = document.createElement('script'); | |||
s.src = TESS_CDN + 'tesseract.min.js'; | |||
s.async = true; | |||
s.onload = () => resolve(); | |||
s.onerror = () => reject(new Error('Tesseract.js konnte nicht geladen werden')); | |||
document.head.appendChild(s); | |||
}); | |||
return _tessReady; | |||
} | |||
return | |||
} | } | ||
// | // Kleines Canvas-Preprocessing (Grayscale & leichter Kontrast) | ||
async function preprocess(file){ | |||
async function | |||
const img = await new Promise((res, rej) => { | const img = await new Promise((res, rej) => { | ||
const | const i = new Image(); | ||
i.onload = () => res(i); | |||
i.onerror = rej; | |||
i.src = URL.createObjectURL(file); | |||
}); | }); | ||
const MAX = 1800; | |||
const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height); | |||
const w = Math.round(img.width * scale); | |||
const h = Math.round(img.height * scale); | |||
const c = document.createElement('canvas'); | |||
c.width = w; c.height = h; | |||
const ctx = c.getContext('2d'); | |||
ctx.imageSmoothingEnabled = true; | |||
ctx.drawImage(img, 0, 0, w, h); | |||
const | const id = ctx.getImageData(0,0,w,h), d=id.data; | ||
for(let i=0;i<d.length;i+=4){ | |||
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2]; | |||
const v = Math.max(0, Math.min(255, (g-128)*1.12 + 128)); // leichte Kontrastanhebung | |||
d[i]=d[i+1]=d[i+2]=v; | |||
for (let i=0;i< | |||
const | |||
const | |||
} | } | ||
ctx.putImageData(id,0,0); | |||
return c; | |||
return | |||
} | } | ||
// | // ---------- OCR ---------- | ||
async function runOCR(file){ | |||
await ensureTesseract(); | |||
function | const { createWorker } = window.Tesseract; | ||
const | |||
const | // Pfade hart setzen, damit Worker/WASM sicher gefunden werden | ||
const worker = await createWorker({ | |||
workerPath: TESS_CDN + 'worker.min.js', | |||
langPath: TESS_CDN + 'langs/', | |||
corePath: TESS_CDN + 'tesseract-core.wasm.js', | |||
logger: m => { | |||
if(m && typeof m.progress === 'number'){ | |||
progress(m.progress); | |||
} | |||
if(m && m.status) { | |||
// optionales Debug | |||
// console.log('[OCR]', m.status, m.progress ?? ''); | |||
} | |||
} | |||
}); | }); | ||
try { | |||
await worker.loadLanguage('eng+deu'); | |||
await worker.initialize('eng+deu'); | |||
const canvas = await preprocess(file); | |||
// psm 6 = Block Text; oem 1 = LSTM | |||
await worker.setParameters({ | |||
tessedit_pageseg_mode: '6', | |||
preserve_interword_spaces: '1' | |||
}); | |||
const result = await worker.recognize(canvas); | |||
const text = (result && result.data && result.data.text) ? result.data.text : ''; | |||
return text.trim(); | |||
} finally { | |||
await worker.terminate(); | |||
progress(null); | |||
} | |||
} | } | ||
// | // ---------- einfache Treffer-Suche im Wiki ---------- | ||
async function searchTitles(q){ | |||
async function | |||
await mw.loader.using('mediawiki.api'); | await mw.loader.using('mediawiki.api'); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
// sehr einfache Heuristik: nimm 3–5 „gute“ Wörter aus dem OCR | |||
const words = String(q || '') | |||
.replace(/[^\p{L}\p{N}\s\-']/gu,' ') | |||
.replace(/\s+/g,' ') | |||
.trim() | |||
.split(' ') | |||
.filter(w => w.length >= 3) | |||
.slice(0, 6); | |||
if(!words.length) return []; | |||
if ( | |||
const | const sr = await api.get({ | ||
action: 'query', | |||
list: 'search', | |||
srsearch: words.map(w => `"${w}"`).join(' '), // UND-verkettet | |||
srlimit: 12, | |||
srnamespace: 0, | |||
formatversion: 2 | |||
}); | |||
return (sr.query && sr.query.search) ? sr.query.search : []; | |||
} | } | ||
function renderResults(items){ | function renderResults(items){ | ||
const box = | const box = $('ados-scan-results'); | ||
box.innerHTML=''; | if(!box) return; | ||
if (!items || !items.length){ | box.innerHTML = ''; | ||
box.innerHTML='<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | if(!items || !items.length){ | ||
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | |||
return; | return; | ||
} | } | ||
items | items.forEach(it => { | ||
const title = it.title || ''; | const title = it.title || ''; | ||
const link = mw.util.getUrl(title.replace(/ /g,'_')); | const link = mw.util.getUrl(title.replace(/ /g,'_')); | ||
const snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"'); | const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"'); | ||
const div=document.createElement('div'); div.className='ados-hit'; | const div = document.createElement('div'); | ||
div.innerHTML = '<b><a href="'+link+'">'+ | div.className = 'ados-hit'; | ||
div.innerHTML = '<b><a href="'+link+'">'+mw.html.escape(title)+'</a></b>' + (snip ? '<div class="meta">'+snip+'</div>' : ''); | |||
box.appendChild(div); | box.appendChild(div); | ||
}); | }); | ||
} | } | ||
// | // ---------- Bindings ---------- | ||
function bind(){ | function bind(){ | ||
const run = $('ados-scan-run'); | |||
const | const file= $('ados-scan-file'); | ||
const | const big = $('ados-scan-bigbtn'); | ||
const | |||
if(!run || !file) return; | |||
if ( | if(big){ big.addEventListener('click', () => file.click()); } | ||
if ( | file.addEventListener('change', () => { if(file.files && file.files[0]) preview(file.files[0]); }); | ||
run.addEventListener('click', async (ev) => { | |||
ev.preventDefault(); | |||
if(!(file.files && file.files[0])){ | |||
alert('Bitte zuerst ein Foto auswählen oder aufnehmen.'); | |||
return; | |||
} | |||
const f = file.files[0]; | |||
run.disabled = true; | |||
const old = run.textContent; | |||
run.textContent = 'Erkenne …'; | |||
status('Erkenne Label …'); | |||
progress(0); | |||
try { | |||
// 1) OCR | |||
const text = await runOCR(f); | |||
// Debug-Ausgabe für dich: | |||
const dbg = $('ados-scan-ocr'); | |||
if(dbg){ dbg.textContent = text; } | |||
if ( | |||
// 2) Suche | |||
status('Suche im Wiki …'); | |||
const hits = await searchTitles(text); | |||
const hits | |||
renderResults(hits); | renderResults(hits); | ||
status('Fertig.'); | |||
} catch( | } catch(err){ | ||
console.error('[LabelScan]', | console.error('[LabelScan] Fehler:', err); | ||
status('Fehler bei Erkennung/Suche. Bitte erneut versuchen.'); | |||
} finally { | } finally { | ||
progress(null); | |||
run.disabled = false; | |||
run.textContent = old; | |||
} | } | ||
}); | }); | ||
console.log('[LabelScan] Gadget gebunden.'); | |||
} | } | ||
if(document.readyState === 'loading'){ | |||
if (document.readyState === 'loading'){ document.addEventListener('DOMContentLoaded', bind); } else { | document.addEventListener('DOMContentLoaded', bind); | ||
} else { | |||
bind(); | |||
} | |||
})(); | })(); | ||
Version vom 6. November 2025, 20:26 Uhr
/* global mw */
(function () {
'use strict';
// ---------- Hilfsfunktionen für UI ----------
function $(id){ return document.getElementById(id); }
function status(t){ const el=$('ados-scan-status'); if(el) el.textContent = t || ''; }
function progress(v){
const bar=$('ados-scan-progress');
if(!bar) return;
if(v == null){ bar.hidden = true; bar.value = 0; }
else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, v)); }
}
function preview(file){
const p = $('ados-scan-preview');
if(!p) return;
const url = URL.createObjectURL(file);
p.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" />';
p.querySelector('img').src = url;
}
// ---------- Tesseract sauber laden (Worker/WASM/CDN) ----------
let _tessReady = null;
const TESS_CDN = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/';
async function ensureTesseract(){
if(_tessReady) return _tessReady;
_tessReady = new Promise((resolve, reject) => {
if(window.Tesseract) return resolve();
const s = document.createElement('script');
s.src = TESS_CDN + 'tesseract.min.js';
s.async = true;
s.onload = () => resolve();
s.onerror = () => reject(new Error('Tesseract.js konnte nicht geladen werden'));
document.head.appendChild(s);
});
return _tessReady;
}
// Kleines Canvas-Preprocessing (Grayscale & leichter Kontrast)
async function preprocess(file){
const img = await new Promise((res, rej) => {
const i = new Image();
i.onload = () => res(i);
i.onerror = rej;
i.src = URL.createObjectURL(file);
});
const MAX = 1800;
const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
const w = Math.round(img.width * scale);
const h = Math.round(img.height * scale);
const c = document.createElement('canvas');
c.width = w; c.height = h;
const ctx = c.getContext('2d');
ctx.imageSmoothingEnabled = true;
ctx.drawImage(img, 0, 0, w, h);
const id = ctx.getImageData(0,0,w,h), d=id.data;
for(let i=0;i<d.length;i+=4){
const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
const v = Math.max(0, Math.min(255, (g-128)*1.12 + 128)); // leichte Kontrastanhebung
d[i]=d[i+1]=d[i+2]=v;
}
ctx.putImageData(id,0,0);
return c;
}
// ---------- OCR ----------
async function runOCR(file){
await ensureTesseract();
const { createWorker } = window.Tesseract;
// Pfade hart setzen, damit Worker/WASM sicher gefunden werden
const worker = await createWorker({
workerPath: TESS_CDN + 'worker.min.js',
langPath: TESS_CDN + 'langs/',
corePath: TESS_CDN + 'tesseract-core.wasm.js',
logger: m => {
if(m && typeof m.progress === 'number'){
progress(m.progress);
}
if(m && m.status) {
// optionales Debug
// console.log('[OCR]', m.status, m.progress ?? '');
}
}
});
try {
await worker.loadLanguage('eng+deu');
await worker.initialize('eng+deu');
const canvas = await preprocess(file);
// psm 6 = Block Text; oem 1 = LSTM
await worker.setParameters({
tessedit_pageseg_mode: '6',
preserve_interword_spaces: '1'
});
const result = await worker.recognize(canvas);
const text = (result && result.data && result.data.text) ? result.data.text : '';
return text.trim();
} finally {
await worker.terminate();
progress(null);
}
}
// ---------- einfache Treffer-Suche im Wiki ----------
async function searchTitles(q){
await mw.loader.using('mediawiki.api');
const api = new mw.Api();
// sehr einfache Heuristik: nimm 3–5 „gute“ Wörter aus dem OCR
const words = String(q || '')
.replace(/[^\p{L}\p{N}\s\-']/gu,' ')
.replace(/\s+/g,' ')
.trim()
.split(' ')
.filter(w => w.length >= 3)
.slice(0, 6);
if(!words.length) return [];
const sr = await api.get({
action: 'query',
list: 'search',
srsearch: words.map(w => `"${w}"`).join(' '), // UND-verkettet
srlimit: 12,
srnamespace: 0,
formatversion: 2
});
return (sr.query && sr.query.search) ? sr.query.search : [];
}
function renderResults(items){
const box = $('ados-scan-results');
if(!box) return;
box.innerHTML = '';
if(!items || !items.length){
box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
return;
}
items.forEach(it => {
const title = it.title || '';
const link = mw.util.getUrl(title.replace(/ /g,'_'));
const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"');
const div = document.createElement('div');
div.className = 'ados-hit';
div.innerHTML = '<b><a href="'+link+'">'+mw.html.escape(title)+'</a></b>' + (snip ? '<div class="meta">'+snip+'</div>' : '');
box.appendChild(div);
});
}
// ---------- Bindings ----------
function bind(){
const run = $('ados-scan-run');
const file= $('ados-scan-file');
const big = $('ados-scan-bigbtn');
if(!run || !file) return;
if(big){ big.addEventListener('click', () => file.click()); }
file.addEventListener('change', () => { if(file.files && file.files[0]) preview(file.files[0]); });
run.addEventListener('click', async (ev) => {
ev.preventDefault();
if(!(file.files && file.files[0])){
alert('Bitte zuerst ein Foto auswählen oder aufnehmen.');
return;
}
const f = file.files[0];
run.disabled = true;
const old = run.textContent;
run.textContent = 'Erkenne …';
status('Erkenne Label …');
progress(0);
try {
// 1) OCR
const text = await runOCR(f);
// Debug-Ausgabe für dich:
const dbg = $('ados-scan-ocr');
if(dbg){ dbg.textContent = text; }
// 2) Suche
status('Suche im Wiki …');
const hits = await searchTitles(text);
renderResults(hits);
status('Fertig.');
} catch(err){
console.error('[LabelScan] Fehler:', err);
status('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
} finally {
progress(null);
run.disabled = false;
run.textContent = old;
}
});
console.log('[LabelScan] Gadget gebunden.');
}
if(document.readyState === 'loading'){
document.addEventListener('DOMContentLoaded', bind);
} else {
bind();
}
})();