MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen

Keine Bearbeitungszusammenfassung
Keine Bearbeitungszusammenfassung
Zeile 1: Zeile 1:
/* global mw, Tesseract */
/* global mw, Tesseract */
(function(){
(function () {
   'use strict';
   'use strict';


   // Lädt Gadget nur, wenn der Scan-Block vorhanden ist.
   // === ADOS: Kategorien & Tokens (ggf. erweitern) ===========================
   function hasUI(){
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];
 
  const KNOWN_TOKENS = [
    'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
    'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
    'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
    'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
  ];
 
  // === UI Präsenz ===========================================================
   function hasUI () {
     return !!document.getElementById('ados-scan-run') &&
     return !!document.getElementById('ados-scan-run') &&
           !!document.getElementById('ados-scan-file');
           !!document.getElementById('ados-scan-file');
   }
   }


   // --- UI Helpers ---
   // === UI Helpers ===========================================================
   function setStatus(t){ var el = document.getElementById('ados-scan-status'); if(el) el.textContent = t || ''; }
   function setStatus (t) {
   function setProgress(p){
    var el = document.getElementById('ados-scan-status');
    if (el) el.textContent = t || '';
  }
   function setProgress (p) {
     var bar = document.getElementById('ados-scan-progress');
     var bar = document.getElementById('ados-scan-progress');
     if(!bar) return;
     if (!bar) return;
     if(p == null){ bar.style.display='none'; bar.value=0; }
     if (p == null) { bar.hidden = true; bar.value = 0; }
     else { bar.style.display=''; bar.value = Math.max(0, Math.min(1, p)); }
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
   }
   }
   function showPreview(file){
   function showPreview (file) {
     var url = URL.createObjectURL(file);
     var url = URL.createObjectURL(file);
     var prev = document.getElementById('ados-scan-preview');
     var prev = document.getElementById('ados-scan-preview');
     if(prev){
     if (prev) {
       prev.innerHTML =
       prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
        '<img alt="Vorschau" src="'+url+'">';
       prev.setAttribute('aria-hidden', 'false');
       prev.setAttribute('aria-hidden','false');
     }
     }
   }
   }


   // --- Tesseract nur bei Bedarf laden ---
   // === Tesseract bei Bedarf laden ==========================================
   var tesseractReady;
   var tesseractReady;
   function ensureTesseract(){
   function ensureTesseract () {
     if (tesseractReady) return tesseractReady;
     if (tesseractReady) return tesseractReady;
     tesseractReady = new Promise(function(resolve, reject){
     tesseractReady = new Promise(function (resolve, reject) {
       if (window.Tesseract) return resolve();
       if (window.Tesseract) return resolve();
       var s = document.createElement('script');
       var s = document.createElement('script');
Zeile 37: Zeile 57:
       s.async = true;
       s.async = true;
       s.onload = resolve;
       s.onload = resolve;
       s.onerror = function(){
       s.onerror = function () {
         var s2 = document.createElement('script');
         var s2 = document.createElement('script');
         s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
         s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
         s2.async = true;
         s2.async = true;
         s2.onload = resolve;
         s2.onload = resolve;
         s2.onerror = function(){ reject(new Error('Tesseract konnte nicht geladen werden')); };
         s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
         document.head.appendChild(s2);
         document.head.appendChild(s2);
       };
       };
Zeile 50: Zeile 70:
   }
   }


   // --- OCR + Heuristik ---
   // === Bild-Vorverarbeitung (für bessere OCR) ===============================
   async function runOCR(file){
  async function preprocessImage (file) {
    const img = await new Promise((res, rej) => {
      const o = new Image();
      o.onload = () => res(o);
      o.onerror = rej;
      o.src = URL.createObjectURL(file);
    });
 
    // Längste Kante auf ~1800px skalieren (schärfer für OCR)
    const MAX = 1800;
    const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
    const w = Math.round(img.width * scale);
    const h = Math.round(img.height * scale);
 
    const c = document.createElement('canvas');
    c.width = w; c.height = h;
    const ctx = c.getContext('2d');
    ctx.imageSmoothingEnabled = true;
    ctx.drawImage(img, 0, 0, w, h);
 
    // Graustufen + leichter Kontrastboost
    const id = ctx.getImageData(0, 0, w, h);
    const d = id.data;
    for (let i = 0; i < d.length; i += 4) {
      const g = 0.2126 * d[i] + 0.7152 * d[i + 1] + 0.0722 * d[i + 2];
      const v = Math.max(0, Math.min(255, (g - 128) * 1.15 + 128));
      d[i] = d[i + 1] = d[i + 2] = v;
    }
    ctx.putImageData(id, 0, 0);
 
    return c; // Canvas an Tesseract übergeben
  }
 
  // === OCR (nutzt Vorverarbeitung) =========================================
   async function runOCR (file) {
     await ensureTesseract();
     await ensureTesseract();
     setProgress(0);
     setProgress(0);
     var res = await Tesseract.recognize(file,'deu+eng',{
     const canvas = await preprocessImage(file);
       logger: function(m){
 
         if(m && m.status === 'recognizing text' && typeof m.progress === 'number'){
    const res = await Tesseract.recognize(canvas, 'deu+eng', {
      // psm 6: ein Block Text – robust für Label
      tessedit_pageseg_mode: 6,
      preserve_interword_spaces: 1,
       logger: function (m) {
         if (m && m.status === 'recognizing text' && typeof m.progress === 'number') {
           setProgress(m.progress);
           setProgress(m.progress);
         }
         }
       }
       }
     });
     });
     setProgress(null);
     setProgress(null);
     return (res && res.data && res.data.text) || '';
     return (res && res.data && res.data.text) || '';
   }
   }


   function extractHints(text){
  // === Hinweise aus OCR extrahieren ========================================
     var raw = String(text||'').replace(/\s+/g,' ').trim();
   function extractHints (text) {
     const raw = String(text || '').replace(/\s+/g, ' ').trim();


     var wordRe = new RegExp('\\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\\-]{3,}\\b','g');
     // Distillery-/Marken-Token, die wirklich im Text vorkommen
    var w = []; var m;
    const foundNames = [];
    while ((m = wordRe.exec(raw)) !== null) { if (w.indexOf(m[0]) < 0) w.push(m[0]); }
    KNOWN_TOKENS.forEach(t => {
     w = w.slice(0, 6);
      const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
      if (re.test(raw)) foundNames.push(t);
     });


     var ageRe = new RegExp('\\b([1-9]\\d?)\\s?(?:years?|yo|jahr|jahre)\\b','gi');
     // Alter: 12 years, 12 yo, 12-year-old, 14 Jahre
     var ages=[]; while ((m = ageRe.exec(raw)) !== null) { var n=(m[0].match(/[1-9]\d?/)||[])[0]; if(n && ages.indexOf(n)<0) ages.push(n); }
    const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     const ages = [];
    let m;
    while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }


     var yearRe = new RegExp('\\b(19|20)\\d{2}\\b','g');
     // Jahrgänge
     var years=[]; while ((m = yearRe.exec(raw)) !== null) { if (years.indexOf(m[0])<0) years.push(m[0]); }
    const yearRe = /\b(19|20)\d{2}\b/g;
     const years = [];
    while ((m = yearRe.exec(raw)) !== null) { if (!years.includes(m[0])) years.push(m[0]); }


     return { words: w, ages: ages, years: years };
     // ein paar „Promi-Wörter“
  }
    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq = new Set(); let w; const words = [];
    while ((w = wordRe.exec(raw)) !== null) {
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }


  function buildSearchQuery(h){
     return { names: foundNames, ages, years, words, raw };
     var parts = [];
    (h.words||[]).forEach(function(w){ parts.push('"'+w+'"'); });
    (h.ages||[]).forEach(function(a){ parts.push('"'+a+'"'); });
    (h.years||[]).forEach(function(y){ parts.push('"'+y+'"'); });
    if (!parts.length) parts.push('Whisky');
    return parts.join(' ');
   }
   }


   // --- Wiki-Suche ---
   // === Smarte Wiki-Suche (3 Pässe) =========================================
   async function searchWiki(query, limit){
   async function searchWikiSmart (hints, limit) {
     await mw.loader.using('mediawiki.api');
     await mw.loader.using('mediawiki.api');
     var api = new mw.Api();
     const api = new mw.Api();
     var res = await api.get({
     const ns0 = 0;
      action: 'query',
    const MAX = limit || 12;
      list: 'search',
 
      srsearch: query,
    function incatStr () {
       srlimit: limit || 12,
      return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
      srwhat: 'text',
    }
       formatversion: 2
 
     });
    // PASS 1: intitle-Kombis (präzise)
     return (res.query && res.query.search) || [];
    const pass1 = [];
    if (hints.names.length) {
      hints.names.forEach(n => {
        if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
        if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
        pass1.push(`intitle:"${n}" ${incatStr()}`);
      });
    }
 
    // PASS 2: gewichtete Volltextsuche
    const key = []
      .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
      .map(x => `"${x}"`).join(' ');
    const pass2 = key ? [ `${key} ${incatStr()}` ] : [];
 
    // PASS 3: Prefix auf Titel
    const pass3 = [];
    if (hints.names.length) pass3.push(hints.names[0]);
    if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);
 
    const seen = new Set(); const out = [];
 
    async function runSr (q) {
      const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
       (r.query?.search || []).forEach(it => {
        const k = it.title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push(it);
       });
     }
 
    for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
     for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
 
    // Prefix (list=prefixsearch)
    for (const p of pass3) {
      const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
      (r.query?.prefixsearch || []).forEach(it => {
        const title = it.title || it['*'];
        const k = title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push({ title, snippet: '' });
      });
      if (out.length >= MAX) break;
    }
 
    return out.slice(0, MAX);
   }
   }


   function esc(s){ return mw.html.escape(String(s||'')); }
  // === HTML Escaping & Treffer-Rendering ===================================
   function esc (s) { return mw.html.escape(String(s || '')); }


   function renderResults(items){
   function renderResults (items) {
     var box = document.getElementById('ados-scan-results');
     var box = document.getElementById('ados-scan-results');
     if (!box) return;
     if (!box) return;
     box.innerHTML = '';
     box.innerHTML = '';
     if (!items || !items.length){
     if (!items || !items.length) {
       box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
       box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
       return;
       return;
     }
     }
     items.slice(0,12).forEach(function(it){
     items.slice(0, 12).forEach(function (it) {
       var title = it.title || '';
       var title = it.title || '';
       var link = mw.util.getUrl(title.replace(/ /g,'_'));
       var link = mw.util.getUrl(title.replace(/ /g, '_'));
       var snip = String(it.snippet||'').replace(/<\/?span[^>]*>/g,'').replace(/&quot;/g,'"');
       var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
       var div   = document.createElement('div');
       var div = document.createElement('div');
       div.className = 'ados-hit';
       div.className = 'ados-hit';
       div.innerHTML =
       div.innerHTML =
         '<b><a href="'+link+'">'+esc(title)+'</a></b>' +
         '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
         (snip ? '<div class="meta">'+snip+'</div>' : '');
         (snip ? '<div class="meta">' + snip + '</div>' : '');
       box.appendChild(div);
       box.appendChild(div);
     });
     });
   }
   }


   // --- Bindung ---
   // === Binding ==============================================================
   var BOUND = false;
   var BOUND = false;
   function bind(){
   function bind () {
     if (BOUND || !hasUI()) return;
     if (BOUND || !hasUI()) return;
     var runBtn = document.getElementById('ados-scan-run');
     var runBtn = document.getElementById('ados-scan-run');
Zeile 138: Zeile 254:


     if (!runBtn || !fileIn) return;
     if (!runBtn || !fileIn) return;
     if (runBtn.dataset.bound === '1') return;
     if (runBtn.dataset.bound === '1') return;
     runBtn.dataset.bound = '1'; BOUND = true;
     runBtn.dataset.bound = '1'; BOUND = true;


     if (bigBtn) bigBtn.addEventListener('click', function(){ fileIn.click(); });
     if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
     fileIn.addEventListener('change', function(){
     fileIn.addEventListener('change', function () {
       if (this.files && this.files[0]) showPreview(this.files[0]);
       if (this.files && this.files[0]) showPreview(this.files[0]);
     });
     });


     runBtn.addEventListener('click', async function(ev){
     runBtn.addEventListener('click', async function (ev) {
       ev.preventDefault();
       ev.preventDefault();
       if (!(fileIn.files && fileIn.files[0])){ alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       var f = fileIn.files[0];
       var f = fileIn.files[0];
       try{
       try {
         runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
         runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
         setStatus('Erkenne Label …');
         setStatus('Erkenne Label …');
         var text = await runOCR(f);
         var text = await runOCR(f);
         setStatus('Suche im Wiki …');
         setStatus('Suche im Wiki …');
         var hints = extractHints(text);
         var hints = extractHints(text);
         var query = buildSearchQuery(hints);
         var hits = await searchWikiSmart(hints, 12);
        const hits = await searchWikiSmart(hints, 12);
         renderResults(hits);
         renderResults(hits);
         setStatus('Fertig.');
         setStatus('Fertig.');
       } catch (e){
       } catch (e) {
         console.error('[LabelScan]', e);
         console.error('[LabelScan]', e);
         setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
         setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
Zeile 177: Zeile 291:
   }
   }


   // Erstbindung + Fallbacks + Observer (auch für Mobile/VE)
   // Erstbindung + Fallbacks + Observer
   if (document.readyState === 'loading'){
   if (document.readyState === 'loading') {
     document.addEventListener('DOMContentLoaded', bind);
     document.addEventListener('DOMContentLoaded', bind);
   } else {
   } else {
Zeile 185: Zeile 299:
   setTimeout(bind, 250);
   setTimeout(bind, 250);
   setTimeout(bind, 1000);
   setTimeout(bind, 1000);
 
   var mo = new MutationObserver(function () { if (!BOUND) bind(); });
   var mo = new MutationObserver(function(){ if (!BOUND) bind(); });
   mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
   mo.observe(document.documentElement || document.body, { childList:true, subtree:true });
})();
})();