MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen

Keine Bearbeitungszusammenfassung
Markierung: Zurückgesetzt
Keine Bearbeitungszusammenfassung
Markierung: Manuelle Zurücksetzung
Zeile 3: Zeile 3:
   'use strict';
   'use strict';


   // ========= KONFIG =========
   // =============================
   // Wenn du enger in Kategorien suchen willst, trage sie hier ein.
  //  KONFIGURATION
   // =============================
 
  // ← Für Tests leer lassen: const ADOS_CATEGORIES = [];
   const ADOS_CATEGORIES = [
   const ADOS_CATEGORIES = [
     'Alle A Dream of Scotland Abfüllungen',
     'Alle A Dream of Scotland Abfüllungen',
Zeile 15: Zeile 18:
   ];
   ];


  // Wörter, die oft auf ADOS-Labels stehen und uns beim Fuzzy-Match helfen
   const KNOWN_TOKENS = [
   const KNOWN_TOKENS = [
     // Marken/Distilleries (Auszug – erweiterbar)
     // Serien / Reihen
    'The Tasteful 8', 'Heroes of Childhood', 'A Dream of Scotland',
    'A Dream of Ireland', 'The Fine Art of Whisky', 'Friendly Mr. Z',
    'Die Whisky Elfen', 'Rumbastic',
 
    // Brennereien / Regionen
     'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
     'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
     'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
     'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
     'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
     'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
     'Bunnahabhain','Springbank','Caperdonich','Linkwood','Glen Scotia',
     'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland','Irland',
    // Serien/ADOS-Sprache
 
    'A Dream of Scotland','A Dream of Ireland','The Tasteful 8','Heroes of Childhood',
    // typische Label-Wörter
     'Cask Strength','Single Malt','Unicorn','Space Girls','Whisky Elfen',
     'Cask Strength','First Fill','Bourbon Barrel','Sherry','PX','Oloroso'
    'The Fine Art of Whisky','Friendly Mr. Z','Rumbastic'
   ];
   ];


   // ========= UI HILFSFUNKTIONEN =========
   // =============================
  //  UI-Hilfen
  // =============================
 
   function hasUI () {
   function hasUI () {
     return !!document.getElementById('ados-scan-run') &&
     return !!document.getElementById('ados-scan-run') &&
           !!document.getElementById('ados-scan-file');
           !!document.getElementById('ados-scan-file');
   }
   }
   function setStatus (t) {
   function setStatus (t) {
     const el = document.getElementById('ados-scan-status');
     var el = document.getElementById('ados-scan-status');
     if (el) el.textContent = t || '';
     if (el) el.textContent = t || '';
   }
   }
   function setProgress (p) {
   function setProgress (p) {
     const bar = document.getElementById('ados-scan-progress');
     var bar = document.getElementById('ados-scan-progress');
     if (!bar) return;
     if (!bar) return;
     if (p == null) { bar.hidden = true; bar.value = 0; }
     if (p == null) { bar.hidden = true; bar.value = 0; }
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
   }
   }
   function showPreview (file) {
   function showPreview (file) {
     const url = URL.createObjectURL(file);
     var url = URL.createObjectURL(file);
     const prev = document.getElementById('ados-scan-preview');
     var prev = document.getElementById('ados-scan-preview');
     if (prev) {
     if (prev) {
       prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
       prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
Zeile 51: Zeile 63:
     }
     }
   }
   }
  const dbg = (msg) => { try { console.log('[LabelScan]', msg); } catch(e){} };


   // ========= TESSERACT WORKER (einmalig) =========
  function showOCRText (t) {
   let workerPromise = null;
    var el = document.getElementById('ados-scan-ocr');
   function ensureWorker () {
    if (el) el.textContent = (t || '').trim();
     if (workerPromise) return workerPromise;
  }
     workerPromise = new Promise((resolve, reject) => {
 
       if (window.Tesseract && Tesseract.createWorker) {
   // =============================
        const worker = Tesseract.createWorker({
  //  Tesseract laden (nur 1x)
          logger: m => {
  // =============================
            if (m?.status === 'recognizing text' && typeof m.progress === 'number') {
 
              setProgress(m.progress);
   var tesseractReady;
            }
   function ensureTesseract () {
          }
     if (tesseractReady) return tesseractReady;
        });
     tesseractReady = new Promise(function (resolve, reject) {
        (async () => {
       if (window.Tesseract) return resolve();
          try {
      var s = document.createElement('script');
            await worker.load();
      s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
            await worker.loadLanguage('eng+deu');  // englisch + deutsch
      s.async = true;
            await worker.initialize('eng+deu');
      s.onload = resolve;
            // OCR-Parameter: eher „Block Text“
      s.onerror = function () {
            await worker.setParameters({
         var s2 = document.createElement('script');
              tessedit_pageseg_mode: '6', // PSM 6: ein Block mit Text
         s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
              preserve_interword_spaces: '1',
         s2.async = true;
              user_defined_dpi: '300'
         s2.onload = resolve;
            });
         s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
            resolve(worker);
         document.head.appendChild(s2);
          } catch (e) {
       };
            reject(e);
      document.head.appendChild(s);
          }
        })();
      } else {
         // Fallback: Bibliothek nachladen
        const s = document.createElement('script');
         s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
         s.async = true;
         s.onload = () => {
          if (!Tesseract?.createWorker) return reject(new Error('Tesseract lädt, aber createWorker fehlt'));
          resolve(ensureWorker());
        };
         s.onerror = () => reject(new Error('Tesseract konnte nicht geladen werden'));
         document.head.appendChild(s);
       }
     });
     });
     return workerPromise;
     return tesseractReady;
   }
   }


   // ========= BILD-VORVERARBEITUNG =========
   // =============================
   function toCanvasFromImage (img, maxSide) {
   //  Vorverarbeitung (OCR)
     const MAX = maxSide || 1800;
  //  Graustufen + Unsharp + adaptive Schwelle
     const scale = Math.min(1, (img.width > img.height) ? (MAX / img.width) : (MAX / img.height));
  // =============================
     const w = Math.max(1, Math.round(img.width * scale));
 
    const h = Math.max(1, Math.round(img.height * scale));
  async function preprocessImage (file) {
    const c = document.createElement('canvas');
    const img = await new Promise((res, rej) => {
    c.width = w; c.height = h;
      const o = new Image();
    const ctx = c.getContext('2d', { willReadFrequently: true });
      o.onload = () => res(o);
    ctx.imageSmoothingEnabled = true;
      o.onerror = rej;
    ctx.drawImage(img, 0, 0, w, h);
      o.src = URL.createObjectURL(file);
    return c;
    });
  }
 
     const MAX = 1800;
     const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height);
     const w = Math.round(img.width * s), h = Math.round(img.height * s);


  function grayscaleContrastUnsharp (canvas) {
    const c = document.createElement('canvas'); c.width = w; c.height = h;
     const ctx = canvas.getContext('2d', { willReadFrequently: true });
     const g = c.getContext('2d', { willReadFrequently: true });
     const { width: w, height: h } = canvas;
     g.imageSmoothingEnabled = true;
     const id = ctx.getImageData(0, 0, w, h);
     g.drawImage(img, 0, 0, w, h);
    const d = id.data;


     // 1) Graustufen + Kontrast
     // Graustufen
     let min=255, max=0;
     let id = g.getImageData(0, 0, w, h), d = id.data;
    const gray = new Uint8ClampedArray(w*h);
     for (let i=0;i<d.length;i+=4){
     for (let i=0, j=0; i<d.length; i+=4, j++) {
       const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
       const g = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
       d[i]=d[i+1]=d[i+2]=y;
       gray[j] = g;
      if (g<min) min=g; if (g>max) max=g;
    }
    const span = Math.max(1, max-min);
    for (let j=0; j<gray.length; j++) {
      let v = (gray[j]-min)/span;          // 0..1
      v = Math.pow(v, 0.9);                // leichte Gamma-Korrektur
      gray[j] = Math.max(0, Math.min(255, Math.round(v*255)));
     }
     }
    g.putImageData(id, 0, 0);


     // 2) Leichtes Unsharp Mask
     // → Unsharp (leichter Hochpass)
     // einfacher 3x3-Box-Blur und dann Original + Amount*(Original-Blur)
     id = g.getImageData(0,0,w,h); d = id.data;
     const blur = new Uint8ClampedArray(gray.length);
     const copy = new Uint8ClampedArray(d);
     const off = [-w-1,-w,-w+1, -1,0,1, w-1,w,w+1];
     const idx = (x,y)=>4*(y*w+x);
     for (let y=1;y<h-1;y++){
     for (let y=1;y<h-1;y++){
       for (let x=1;x<w-1;x++){
       for (let x=1;x<w-1;x++){
         let s=0;
         const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)],
         const idx=y*w+x;
              d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)];
         for (let k=0;k<9;k++) s += gray[idx+off[k]];
         const lap = 4*a - b - c0 - d0 - e;
        blur[idx] = s/9;
         const v = Math.max(0, Math.min(255, a + 0.3*lap));
        d[i0]=d[i0+1]=d[i0+2]=v;
       }
       }
     }
     }
     const amount=0.65;
     g.putImageData(id,0,0);
    for (let i=0;i<gray.length;i++){
      let v = gray[i] + amount*(gray[i] - (blur[i]||gray[i]));
      gray[i] = v<0?0:v>255?255:v;
    }


     // 3) Adaptive Schwelle light (global + lokale Korrektur)
     // → adaptive Schwelle (lokaler Mittelwert)
     // global threshold um den Mittelwert, dann leichte Aufhellung dunkler Zeichen
    const win = 25, half = (win|0);
     let sum=0; for (let i=0;i<gray.length;i++) sum+=gray[i];
     id = g.getImageData(0,0,w,h); d = id.data;
    const mean = sum/gray.length;
     for (let y=0;y<h;y++){
    for (let i=0, p=0; i<d.length; i+=4, p++) {
      for (let x=0;x<w;x++){
      const v = gray[p] < mean*0.97 ? 0 : 255; // binär
        let sum=0, cnt=0;
      d[i]=d[i+1]=d[i+2]=v; d[i+3]=255;
        for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){
          for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){
            sum += d[4*(yy*w+xx)];
            cnt++;
          }
        }
        const thr = (sum/cnt) - 6;
        const i = 4*(y*w+x);
        const v = d[i] < thr ? 0 : 255;
        d[i]=d[i+1]=d[i+2]=v;
      }
     }
     }
     ctx.putImageData(id, 0, 0);
     g.putImageData(id,0,0);
    return canvas;
  }


  function rotateCanvas (src, deg) {
    const rad = deg * Math.PI/180;
    const w = src.width, h = src.height;
    const swap = (deg % 180) !== 0;
    const c = document.createElement('canvas');
    c.width = swap ? h : w;
    c.height = swap ? w : h;
    const ctx = c.getContext('2d');
    ctx.translate(c.width/2, c.height/2);
    ctx.rotate(rad);
    ctx.drawImage(src, -w/2, -h/2);
     return c;
     return c;
   }
   }


   function cropCanvas (src, x, y, w, h) {
  // Hilfsfunktionen für Varianten
     const c = document.createElement('canvas');
   function crop(canvas, x, y, w, h){
    c.width = w; c.height = h;
     const c = document.createElement('canvas'); c.width=w; c.height=h;
     c.getContext('2d').drawImage(src, x, y, w, h, 0, 0, w, h);
     c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h);
     return c;
     return c;
  }
  function rotate(canvas, deg){
    const r = document.createElement('canvas');
    const ctx = r.getContext('2d');
    if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; }
    else { r.width=canvas.height; r.height=canvas.width; }
    ctx.translate(r.width/2, r.height/2);
    ctx.rotate(deg*Math.PI/180);
    ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2);
    return r;
   }
   }


   function buildCandidates (base) {
   async function ocrOne(canvas, lang) {
    // Vollbild + zentrale & untere Bänder, je Rotation 0/+90/-90
     const res = await Tesseract.recognize(canvas, lang, {
    const L = [];
       // Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke)
     const rotations = [0, 90, -90];
       tessedit_pageseg_mode: 11,
    rotations.forEach((deg) => {
       preserve_interword_spaces: 1
      const r = deg ? rotateCanvas(base, deg) : base;
      const w = r.width, h = r.height;
      const full = grayscaleContrastUnsharp(r.cloneNode ? r.cloneNode(true) : r);
      L.push(full);
 
       // zentral ~70% Bereich
      const cw = Math.round(w*0.8), ch = Math.round(h*0.7);
       const cx = Math.round((w-cw)/2), cy = Math.round((h-ch)/2);
       L.push(grayscaleContrastUnsharp(cropCanvas(r, cx, cy, cw, ch)));
 
      // unteres Band (viele ADOS haben unten Textblöcke)
      const bh = Math.round(h*0.38);
      L.push(grayscaleContrastUnsharp(cropCanvas(r, 0, h-bh, w, bh)));
     });
     });
     return L;
     return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 };
   }
   }


   // ========= OCR PIPELINE =========
   // =============================
   async function runOCR (file) {
   //  Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache
    const worker = await ensureWorker();
  // =============================
    setProgress(0);


     // Bild laden → Canvas → Kandidaten erzeugen
  async function runOCR(file){
     const img = await new Promise((res, rej) => {
    await ensureTesseract();
      const o = new Image();
     setProgress(0.01);
      o.onload = () => res(o);
     const base = await preprocessImage(file);
      o.onerror = () => rej(new Error('Bild konnte nicht geladen werden'));
 
      o.src = URL.createObjectURL(file);
    // Kandidatenflächen
    });
    const variants = [];
    const base = toCanvasFromImage(img, 1800);
    variants.push(base); // komplett
    const candidates = buildCandidates(base);
    variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte
    variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner


     let best = { text: '', conf: 0 };
     // + Rotationen
     // Zwei PSM-Modi probieren (6 → Block, 7 → eine Textzeile – robust gegen plakative Schriften)
    const more = [];
     const PSMs = ['6','7'];
     for (const v of variants){
      more.push(v, rotate(v, 90), rotate(v, -90));
     }


     for (const c of candidates) {
    // zwei Sprachmodi testen
       for (const psm of PSMs) {
    const results = [];
     for (const canv of more){
       for (const lang of ['deu+eng','eng']){
         try {
         try {
          await worker.setParameters({ tessedit_pageseg_mode: psm });
           const r = await ocrOne(canv, lang);
           const { data } = await worker.recognize(c);
           results.push(r);
          const text = data?.text ? String(data.text) : '';
         } catch(e){ /* einzelne Fehlschläge ignorieren */ }
          const conf = (data?.confidence || 0);
          // Heuristik: genug Buchstaben?
          const letters = (text.match(/[A-Za-zÄÖÜäöüß]{2,}/g) || []).length;
          const score = conf + letters*1.5;
           if (score > (best.conf + (best.letters||0)*1.5)) {
            best = { text, conf, letters };
          }
          // Wenn sehr gut: früh abbrechen
          if (conf > 75 && letters > 15) break;
         } catch (e) {
          // einfach nächsten Kandidaten probieren
        }
       }
       }
     }
     }
     setProgress(null);
     setProgress(null);


     // Debug-Ausgabe
     results.sort((a,b)=> (b.conf||0)-(a.conf||0));
    const dbgEl = document.getElementById('ados-scan-ocr');
     return (results[0]?.text)||'';
     if (dbgEl) dbgEl.textContent = best.text || '(kein Text erkannt)';
  }


    return best.text || '';
  // =============================
   }
  //  Hinweise aus OCR
   // =============================


  // ========= HINWEISE EXTRAHIEREN & SUCHE =========
   function extractHints (text) {
   function extractHints (text) {
     const raw = String(text || '').replace(/\s+/g, ' ').trim();
     const raw = String(text || '').replace(/\s+/g, ' ').trim();


     const names = [];
     const foundNames = [];
     KNOWN_TOKENS.forEach(t => {
     KNOWN_TOKENS.forEach(t => {
       const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
       const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
       if (re.test(raw)) names.push(t);
       if (re.test(raw)) foundNames.push(t);
     });
     });


     const ages = [];
     const ages = [];
     let m; const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     while ((m = ageRe.exec(raw)) !== null) { const n = m[1]; if (!ages.includes(n)) ages.push(n); }
    let m;
     while ((m = ageRe.exec(raw)) !== null) {
      const n = m[1]; if (!ages.includes(n)) ages.push(n);
    }


     const years = [];
     const years = [];
     const yearRe = /\b(19|20)\d{2}\b/g;
     const yearRe = /\b(19|20)\d{2}\b/g;
     while ((m = yearRe.exec(raw)) !== null) { const y = m[0]; if (!years.includes(y)) years.push(y); }
     while ((m = yearRe.exec(raw)) !== null) {
      if (!years.includes(m[0])) years.push(m[0]);
    }


    const words = [];
    const seen = new Set();
     const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
     const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
     while ((m = wordRe.exec(raw)) !== null) { const w = m[0]; if (!seen.has(w)) { seen.add(w); words.push(w); if (words.length>=8) break; } }
    const uniq = new Set(); let w; const words = [];
     while ((w = wordRe.exec(raw)) !== null) {
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }


     return { names, ages, years, words, raw };
     return { names: foundNames, ages, years, words, raw };
  }
 
  // =============================
  //  Suche (3 Pässe) + Fallbacks
  // =============================
 
  function esc (s) { return mw.html.escape(String(s || '')); }
 
  function incatStr () {
    return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' ');
   }
   }


   async function searchWikiSmart (hints, limit) {
   async function searchWikiSmart (hints, limit) {
     await mw.loader.using('mediawiki.api');
     await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']);
     const api = new mw.Api();
     const api = new mw.Api();
     const ns0 = 0, MAX = limit || 12;
     const ns0 = 0;
 
    const MAX = limit || 12;
    const incats = ADOS_CATEGORIES.length
      ? ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ')
      : '';
 
    const buckets = [];


     // PASS 1: sehr eng (intitle + Kategorien)
     // PASS 1: intitle-Kombis (präzise)
    const pass1 = [];
     if (hints.names.length) {
     if (hints.names.length) {
       hints.names.forEach(n => {
       hints.names.forEach(n => {
         if (hints.ages.length) hints.ages.forEach(a => buckets.push(`intitle:"${n}" intitle:${a} ${incats}`.trim()));
         if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
         if (hints.years.length) hints.years.forEach(y => buckets.push(`intitle:"${n}" "${y}" ${incats}`.trim()));
         if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
         buckets.push(`intitle:"${n}" ${incats}`.trim());
         pass1.push(`intitle:"${n}" ${incatStr()}`);
       });
       });
     }
     }
    // PASS 2: keyword-bündel
    const key = [].concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
                  .map(x => `"${x}"`).join(' ');
    if (key) buckets.push(`${key} ${incats}`.trim());


     // PASS 3: ohne Kategorien (breiter Fallback)
     // PASS 2: gewichtete Volltextsuche
     if (ADOS_CATEGORIES.length) {
     const key = []
      if (hints.names.length) hints.names.forEach(n => buckets.push(`intitle:"${n}"`));
      .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
      if (key) buckets.push(key);
      .map(x => `"${x}"`).join(' ');
    }
    const pass2 = key ? [ `${key} ${incatStr()}` ] : [];


     // PASS 4: Prefix
     // PASS 3: Prefix auf Titel
     if (hints.names.length) buckets.push(hints.names[0]);
    const pass3 = [];
     else if (hints.words.length) buckets.push(hints.words[0]);
     if (hints.names.length) pass3.push(hints.names[0]);
     if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);


     const seen = new Set(); const out = [];
     const seen = new Set(); const out = [];
     async function runSearch (q) {
 
      if (!q) return;
     async function runSr (q) {
       const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace: ns0, srlimit: MAX, formatversion:2 });
       const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
       (r.query?.search || []).forEach(it => { if (!seen.has(it.title)) { seen.add(it.title); out.push(it); } });
       (r.query?.search || []).forEach(it => {
        const k = it.title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push(it);
      });
     }
     }
     for (const q of buckets) {
 
       await runSearch(q);
    for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
     for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
 
    // Prefix (list=prefixsearch)
    for (const p of pass3) {
       const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
      (r.query?.prefixsearch || []).forEach(it => {
        const title = it.title || it['*'];
        const k = title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push({ title, snippet: '' });
      });
       if (out.length >= MAX) break;
       if (out.length >= MAX) break;
     }
     }
    if (out.length < MAX) {
 
      // Prefix-Fallback
      const p = hints.names[0] || hints.words[0] || '';
      if (p) {
        const r = await api.get({ action:'query', list:'prefixsearch', pssearch:p, psnamespace: ns0, pslimit: MAX });
        (r.query?.prefixsearch || []).forEach(it => {
          const title = it.title || it['*'];
          if (!seen.has(title)) { seen.add(title); out.push({ title, snippet:'' }); }
        });
      }
    }
     return out.slice(0, MAX);
     return out.slice(0, MAX);
   }
   }


   function esc (s) { return mw.html.escape(String(s||'')); }
  // ganz einfacher Fuzzy-Fallback auf Suchergebnissen
   function scoreTitle(title, hints){
    const t = String(title||'').toLowerCase();
    let s = 0;
    hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; });
    hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; });
    hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; });
    hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; });
    return s;
  }
 
  async function fallbackFuzzyTitles(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12;
 
    // Breite Suche mit Tokens (mit/ohne Kategorie)
    const q1 = []
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`).join(' ');
    const q = `${q1} ${incatStr()}`.trim();
 
    const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 });
    const items = (r.query?.search || []);
    const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) }));
    scored.sort((a,b)=> b._score - a._score);
    const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger
    return top;
  }
 
  async function broadSearchNoCategory(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12;
 
    const parts = []
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`);
    const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');
 
    const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 });
    return (r.query?.search || []);
  }
 
  // =============================
  //  Ergebnisse rendern
  // =============================
 
   function renderResults (items) {
   function renderResults (items) {
     const box = document.getElementById('ados-scan-results');
     var box = document.getElementById('ados-scan-results');
     if (!box) return;
     if (!box) return;
     box.innerHTML = '';
     box.innerHTML = '';
Zeile 348: Zeile 386:
       return;
       return;
     }
     }
     items.slice(0, 12).forEach(it => {
     items.slice(0, 12).forEach(function (it) {
       const title = it.title || '';
       var title = it.title || '';
       const link = mw.util.getUrl(title.replace(/ /g,'_'));
       var link = mw.util.getUrl(title.replace(/ /g, '_'));
       const snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
       var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
       const div = document.createElement('div');
       var div = document.createElement('div');
       div.className = 'ados-hit';
       div.className = 'ados-hit';
       div.innerHTML =
       div.innerHTML =
         '<b><a href="'+link+'">'+esc(title)+'</a></b>' +
         '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
         (snip ? '<div class="meta">'+snip+'</div>' : '');
         (snip ? '<div class="meta">' + snip + '</div>' : '');
       box.appendChild(div);
       box.appendChild(div);
     });
     });
   }
   }


   // ========= EVENT-BINDING =========
   // =============================
   let BOUND=false;
  //  Binding
   function bind() {
  // =============================
 
   var BOUND = false;
   function bind () {
     if (BOUND || !hasUI()) return;
     if (BOUND || !hasUI()) return;
     const runBtn = document.getElementById('ados-scan-run');
 
     const bigBtn = document.getElementById('ados-scan-bigbtn');
     var runBtn = document.getElementById('ados-scan-run');
     const fileIn = document.getElementById('ados-scan-file');
     var fileIn = document.getElementById('ados-scan-file');
    var bigBtn = document.getElementById('ados-scan-bigbtn');
     var form = document.getElementById('ados-scan-form');
 
     if (!runBtn || !fileIn) return;
     if (!runBtn || !fileIn) return;
     BOUND=true;
     if (runBtn.dataset.bound === '1') return;
    runBtn.dataset.bound = '1'; BOUND = true;


     if (bigBtn) bigBtn.addEventListener('click', () => fileIn.click());
     if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
     fileIn.addEventListener('change', function(){
     fileIn.addEventListener('change', function () {
       if (this.files && this.files[0]) showPreview(this.files[0]);
       if (this.files && this.files[0]) showPreview(this.files[0]);
     });
     });


     runBtn.addEventListener('click', async function (ev) {
     function onSubmit(ev){
       ev.preventDefault();
       ev.preventDefault();
       if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       try {
       var f = fileIn.files[0];
        runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
      (async function(){
        setStatus('Vorverarbeitung & Texterkennung …');
        try {
        const text = await runOCR(fileIn.files[0]);
          runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
          setStatus('Erkenne Label …');
          const text = await runOCR(f);
          showOCRText(text);


        setStatus('Suche im Wiki …');
          setStatus('Suche im Wiki …');
        const hints = extractHints(text);
          const hints = extractHints(text);
        const hits  = await searchWikiSmart(hints, 12);


        renderResults(hits);
          let hits = await searchWikiSmart(hints, 12);
        setStatus('Fertig.');
          if (!hits || !hits.length) {
      } catch (e) {
            setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
        console.error('[LabelScan]', e);
            hits = await fallbackFuzzyTitles(hints, 12);
        setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
          }
      } finally {
          if (!hits || !hits.length) {
        runBtn.disabled = false; runBtn.textContent = '🔍 Erkennen & suchen';
            setStatus('Kein Treffer – breite Suche ohne Kategorien …');
       }
            hits = await broadSearchNoCategory(hints, 12);
     });
          }
 
          renderResults(hits);
          setStatus('Fertig.');
        } catch (e) {
          console.error('[LabelScan]', e);
          setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
        } finally {
          runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
        }
       })();
     }
 
    runBtn.addEventListener('click', onSubmit);
    if (form) form.addEventListener('submit', onSubmit);
 
    // Sicherheit
    var wrap = document.getElementById('ados-labelscan');
    if (wrap) wrap.style.position = 'relative';
    runBtn.style.position = 'relative';
    runBtn.style.zIndex = '9999';
    runBtn.style.pointerEvents = 'auto';
   }
   }


   if (document.readyState === 'loading') document.addEventListener('DOMContentLoaded', bind);
   if (document.readyState === 'loading') {
   else bind();
    document.addEventListener('DOMContentLoaded', bind);
   setTimeout(bind, 250); setTimeout(bind, 1000);
   } else {
   new MutationObserver(() => { if (!BOUND) bind(); })
    bind();
    .observe(document.documentElement || document.body, { childList:true, subtree:true });
  }
   setTimeout(bind, 250);
  setTimeout(bind, 1000);
   var mo = new MutationObserver(function () { if (!BOUND) bind(); });
  mo.observe(document.documentElement || document.body, { childList: true, subtree: true });


})();
})();