|
|
| Zeile 1: |
Zeile 1: |
| /* global mw */ | | /* Einfacher & stabiler Label-Scanner |
| | Kein Worker, keine WASM, läuft in allen Browsern */ |
| | |
| (function () { | | (function () { |
| 'use strict';
| |
|
| |
|
| // ---------- Hilfsfunktionen für UI ---------- | | const btn = document.getElementById("ados-scan-run"); |
| function $(id){ return document.getElementById(id); } | | const fileInput = document.getElementById("ados-scan-file"); |
| function status(t){ const el=$('ados-scan-status'); if(el) el.textContent = t || ''; } | | const statusEl = document.getElementById("ados-scan-status"); |
| function progress(v){ | | const resultsEl = document.getElementById("ados-scan-results"); |
| const bar=$('ados-scan-progress');
| | const previewEl = document.getElementById("ados-scan-preview"); |
| if(!bar) return;
| | |
| if(v == null){ bar.hidden = true; bar.value = 0; }
| | if (!btn || !fileInput) return; |
| else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, v)); } | | |
| | function setStatus(t) { |
| | statusEl.textContent = t || ""; |
| } | | } |
| function preview(file){ | | |
| const p = $('ados-scan-preview');
| | function preview(file) { |
| if(!p) return;
| |
| const url = URL.createObjectURL(file); | | const url = URL.createObjectURL(file); |
| p.innerHTML = '<img alt="Vorschau" style="max-width:100%;height:auto;border-radius:8px" />'; | | previewEl.innerHTML = `<img src="${url}" style="max-width:100%; border-radius:6px;">`; |
| p.querySelector('img').src = url;
| |
| } | | } |
|
| |
|
| // ---------- Tesseract sauber laden (Worker/WASM/CDN) ---------- | | // Tesseract v4 → stabil, kein WASM nötig |
| let _tessReady = null;
| | function loadTesseract() { |
| const TESS_CDN = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/';
| | return new Promise((resolve, reject) => { |
| async function ensureTesseract(){ | | if (window.Tesseract) return resolve(); |
| if(_tessReady) return _tessReady; | | const s = document.createElement("script"); |
| _tessReady = new Promise((resolve, reject) => {
| | s.src = "https://cdn.jsdelivr.net/npm/tesseract.js@4/dist/tesseract.min.js"; |
| if(window.Tesseract) return resolve(); | | s.onload = resolve; |
| | | s.onerror = reject; |
| const s = document.createElement('script'); | |
| s.src = TESS_CDN + 'tesseract.min.js'; | |
| s.async = true;
| |
| s.onload = () => resolve(); | |
| s.onerror = () => reject(new Error('Tesseract.js konnte nicht geladen werden')); | |
| document.head.appendChild(s); | | document.head.appendChild(s); |
| }); | | }); |
| return _tessReady;
| |
| } | | } |
|
| |
|
| // Kleines Canvas-Preprocessing (Grayscale & leichter Kontrast)
| | async function runOCR(file) { |
| async function preprocess(file){ | | await loadTesseract(); |
| const img = await new Promise((res, rej) => { | |
| const i = new Image();
| |
| i.onload = () => res(i);
| |
| i.onerror = rej;
| |
| i.src = URL.createObjectURL(file);
| |
| });
| |
| const MAX = 1800;
| |
| const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
| |
| const w = Math.round(img.width * scale);
| |
| const h = Math.round(img.height * scale);
| |
| const c = document.createElement('canvas');
| |
| c.width = w; c.height = h;
| |
| const ctx = c.getContext('2d');
| |
| ctx.imageSmoothingEnabled = true;
| |
| ctx.drawImage(img, 0, 0, w, h);
| |
|
| |
|
| const id = ctx.getImageData(0,0,w,h), d=id.data; | | return Tesseract.recognize(file, "eng", { |
| for(let i=0;i<d.length;i+=4){
| | logger: m => { /* kein Fortschritt nötig */ } |
| const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2]; | | }).then(r => r.data.text || ""); |
| const v = Math.max(0, Math.min(255, (g-128)*1.12 + 128)); // leichte Kontrastanhebung
| |
| d[i]=d[i+1]=d[i+2]=v;
| |
| } | |
| ctx.putImageData(id,0,0);
| |
| return c;
| |
| } | | } |
|
| |
|
| // ---------- OCR ---------- | | // Sucht nach Destillerie & Alter |
| async function runOCR(file){ | | function extractQuery(text) { |
| await ensureTesseract(); | | text = text.replace(/\s+/g, " "); |
| const { createWorker } = window.Tesseract;
| |
| | |
| // Pfade hart setzen, damit Worker/WASM sicher gefunden werden
| |
| const worker = await createWorker({
| |
| workerPath: TESS_CDN + 'worker.min.js',
| |
| langPath: TESS_CDN + 'langs/',
| |
| corePath: TESS_CDN + 'tesseract-core.wasm.js',
| |
| logger: m => {
| |
| if(m && typeof m.progress === 'number'){
| |
| progress(m.progress);
| |
| }
| |
| if(m && m.status) {
| |
| // optionales Debug
| |
| // console.log('[OCR]', m.status, m.progress ?? '');
| |
| }
| |
| }
| |
| });
| |
| | |
| try {
| |
| await worker.loadLanguage('eng+deu');
| |
| await worker.initialize('eng+deu');
| |
|
| |
|
| const canvas = await preprocess(file);
| | const dist = text.match(/\b([A-Z][a-z]{3,})\b/); |
| | const age = text.match(/\b([1-3]?\d)\s?(years?|yo|Jahre)\b/i); |
|
| |
|
| // psm 6 = Block Text; oem 1 = LSTM
| | let parts = []; |
| await worker.setParameters({
| | if (dist) parts.push(dist[1]); |
| tessedit_pageseg_mode: '6',
| | if (age) parts.push(age[1]); |
| preserve_interword_spaces: '1'
| |
| });
| |
|
| |
|
| const result = await worker.recognize(canvas);
| | return parts.join(" "); |
| const text = (result && result.data && result.data.text) ? result.data.text : '';
| |
| return text.trim();
| |
| } finally {
| |
| await worker.terminate();
| |
| progress(null);
| |
| }
| |
| } | | } |
|
| |
|
| // ---------- einfache Treffer-Suche im Wiki ----------
| | async function searchWiki(q) { |
| async function searchTitles(q){ | | await mw.loader.using("mediawiki.api"); |
| await mw.loader.using('mediawiki.api'); | |
| const api = new mw.Api(); | | const api = new mw.Api(); |
| | | const r = await api.get({ |
| // sehr einfache Heuristik: nimm 3–5 „gute“ Wörter aus dem OCR
| | action: "query", |
| const words = String(q || '') | | list: "search", |
| .replace(/[^\p{L}\p{N}\s\-']/gu,' ')
| | srsearch: q, |
| .replace(/\s+/g,' ')
| |
| .trim()
| |
| .split(' ')
| |
| .filter(w => w.length >= 3)
| |
| .slice(0, 6);
| |
| | |
| if(!words.length) return [];
| |
| | |
| const sr = await api.get({
| |
| action: 'query', | |
| list: 'search', | |
| srsearch: words.map(w => `"${w}"`).join(' '), // UND-verkettet | |
| srlimit: 12,
| |
| srnamespace: 0,
| |
| formatversion: 2 | | formatversion: 2 |
| }); | | }); |
| | | return r.query.search || []; |
| return (sr.query && sr.query.search) ? sr.query.search : []; | |
| } | | } |
|
| |
|
| function renderResults(items){ | | function renderResults(list) { |
| const box = $('ados-scan-results');
| | if (!list.length) { |
| if(!box) return;
| | resultsEl.innerHTML = "<div style='opacity:0.6'>Keine Treffer gefunden</div>"; |
| box.innerHTML = '';
| |
| if(!items || !items.length){ | |
| box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>'; | |
| return; | | return; |
| } | | } |
| items.forEach(it => { | | resultsEl.innerHTML = list.map(it => |
| const title = it.title || ''; | | `<div><a href="/wiki/${encodeURIComponent(it.title)}">${it.title}</a></div>` |
| const link = mw.util.getUrl(title.replace(/ /g,'_'));
| | ).join(""); |
| const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g,'').replace(/"/g,'"');
| |
| const div = document.createElement('div');
| |
| div.className = 'ados-hit';
| |
| div.innerHTML = '<b><a href="'+link+'">'+mw.html.escape(title)+'</a></b>' + (snip ? '<div class="meta">'+snip+'</div>' : '');
| |
| box.appendChild(div);
| |
| });
| |
| } | | } |
|
| |
|
| // ---------- Bindings ---------- | | fileInput.addEventListener("change", () => { |
| function bind(){
| | if (fileInput.files[0]) preview(fileInput.files[0]); |
| const run = $('ados-scan-run'); | | }); |
| const file= $('ados-scan-file');
| |
| const big = $('ados-scan-bigbtn');
| |
|
| |
|
| if(!run || !file) return; | | btn.addEventListener("click", async () => { |
| | if (!fileInput.files[0]) return alert("Bitte Bild wählen!"); |
|
| |
|
| if(big){ big.addEventListener('click', () => file.click()); } | | btn.disabled = true; |
| file.addEventListener('change', () => { if(file.files && file.files[0]) preview(file.files[0]); }); | | setStatus("Erkenne Text …"); |
|
| |
|
| run.addEventListener('click', async (ev) => { | | try { |
| ev.preventDefault(); | | const text = await runOCR(fileInput.files[0]); |
| if(!(file.files && file.files[0])){
| | setStatus("Suche im Wiki …"); |
| alert('Bitte zuerst ein Foto auswählen oder aufnehmen.');
| |
| return;
| |
| }
| |
|
| |
|
| const f = file.files[0]; | | const q = extractQuery(text); |
| run.disabled = true;
| | const hits = await searchWiki(q); |
| const old = run.textContent; | |
| run.textContent = 'Erkenne …';
| |
| status('Erkenne Label …');
| |
| progress(0);
| |
|
| |
|
| try { | | renderResults(hits); |
| // 1) OCR
| | setStatus("Fertig ✅"); |
| const text = await runOCR(f);
| | } catch (e) { |
| // Debug-Ausgabe für dich:
| | console.error(e); |
| const dbg = $('ados-scan-ocr');
| | setStatus("Fehler ❗"); |
| if(dbg){ dbg.textContent = text; }
| | } |
|
| |
|
| // 2) Suche
| | btn.disabled = false; |
| status('Suche im Wiki …');
| | }); |
| const hits = await searchTitles(text);
| |
| renderResults(hits);
| |
| status('Fertig.');
| |
| } catch(err){
| |
| console.error('[LabelScan] Fehler:', err);
| |
| status('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
| |
| } finally {
| |
| progress(null);
| |
| run.disabled = false;
| |
| run.textContent = old;
| |
| }
| |
| });
| |
|
| |
|
| console.log('[LabelScan] Gadget gebunden.');
| |
| }
| |
|
| |
| if(document.readyState === 'loading'){
| |
| document.addEventListener('DOMContentLoaded', bind);
| |
| } else {
| |
| bind();
| |
| }
| |
| })(); | | })(); |