Zum Inhalt springen

MediaWiki:Gadget-LabelScan.js: Unterschied zwischen den Versionen

Aus ADOS Wiki
Keine Bearbeitungszusammenfassung
Keine Bearbeitungszusammenfassung
Zeile 3: Zeile 3:
   'use strict';
   'use strict';


   // ========== ADOS: Kategorien & Distillery-Tokens ==========
   // =============================
   const ADOS_CATEGORIES = []; // <- Filter AUS zum Testen
  //  KONFIGURATION
   // =============================


  // ← Für Tests leer lassen: const ADOS_CATEGORIES = [];
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];


   const KNOWN_TOKENS = [
   const KNOWN_TOKENS = [
    // Serien / Reihen
    'The Tasteful 8', 'Heroes of Childhood', 'A Dream of Scotland',
    'A Dream of Ireland', 'The Fine Art of Whisky', 'Friendly Mr. Z',
    'Die Whisky Elfen', 'Rumbastic',
    // Brennereien / Regionen
     'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
     'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
     'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
     'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
     'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
     'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
     'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland'
     'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland','Irland',
 
    // typische Label-Wörter
    'Cask Strength','First Fill','Bourbon Barrel','Sherry','PX','Oloroso'
   ];
   ];


   // ========== Utility: Normalisierung & Fuzzy ==========
   // =============================
   function norm(s){
   //   UI-Hilfen
    return String(s||'')
  // =============================
      .toLowerCase()
      .normalize('NFD').replace(/[\u0300-\u036f]/g,'')
      .replace(/[^a-z0-9\s\-]/g,' ')
      .replace(/\s+/g,' ')
      .trim();
  }
 
  function levenshtein(a,b){
    a = norm(a); b = norm(b);
    const m = a.length, n = b.length;
    if (!m) return n; if (!n) return m;
    const dp = new Array(n+1);
    for (let j=0;j<=n;j++) dp[j]=j;
    for (let i=1;i<=m;i++){
      let prev = dp[0], tmp; dp[0]=i;
      for (let j=1;j<=n;j++){
        tmp = dp[j];
        dp[j] = (a[i-1]===b[j-1]) ? prev : 1 + Math.min(prev, dp[j-1], dp[j]);
        prev = tmp;
      }
    }
    return dp[n];
  }
 
  function tokenSet(str){ return new Set(norm(str).split(' ').filter(Boolean)); }
  function overlapScore(a,b){
    const A = tokenSet(a), B = tokenSet(b);
    if (!A.size || !B.size) return 0;
    let inter=0; A.forEach(t=>{ if (B.has(t)) inter++; });
    return inter / Math.max(A.size, B.size);
  }


  function escHTML (s) {
    return String(s ?? '')
      .replace(/&/g,'&amp;')
      .replace(/</g,'&lt;')
      .replace(/>/g,'&gt;')
      .replace(/"/g,'&quot;')
      .replace(/'/g,'&#39;');
  }
  // ========== UI Präsenz & Helfer ==========
   function hasUI () {
   function hasUI () {
     return !!document.getElementById('ados-scan-run') &&
     return !!document.getElementById('ados-scan-run') &&
           !!document.getElementById('ados-scan-file');
           !!document.getElementById('ados-scan-file');
   }
   }
   function setStatus (t) {
   function setStatus (t) {
     var el = document.getElementById('ados-scan-status');
     var el = document.getElementById('ados-scan-status');
     if (el) el.textContent = t || '';
     if (el) el.textContent = t || '';
   }
   }
  function showOCRText (t) {
 
  var el = document.getElementById('ados-scan-ocr');
  if (el) el.textContent = (t || '').trim();
  }
   function setProgress (p) {
   function setProgress (p) {
     var bar = document.getElementById('ados-scan-progress');
     var bar = document.getElementById('ados-scan-progress');
Zeile 77: Zeile 54:
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
     else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
   }
   }
   function showPreview (file) {
   function showPreview (file) {
     var url = URL.createObjectURL(file);
     var url = URL.createObjectURL(file);
Zeile 86: Zeile 64:
   }
   }


   // ========== Tesseract bei Bedarf laden ==========
  function showOCRText (t) {
    var el = document.getElementById('ados-scan-ocr');
    if (el) el.textContent = (t || '').trim();
  }
 
   // =============================
  //  Tesseract laden (nur 1x)
  // =============================
 
   var tesseractReady;
   var tesseractReady;
   function ensureTesseract () {
   function ensureTesseract () {
Zeile 109: Zeile 95:
   }
   }


   // ========== Bild-Vorverarbeitung (Canvas) ==========
   // =============================
  //  Vorverarbeitung (OCR)
  //  Graustufen + Unsharp + adaptive Schwelle
  // =============================
 
   async function preprocessImage (file) {
   async function preprocessImage (file) {
     const img = await new Promise((res, rej) => {
     const img = await new Promise((res, rej) => {
Zeile 118: Zeile 108:
     });
     });


    // Längste Kante auf ~1800px skalieren (besser für OCR)
     const MAX = 1800;
     const MAX = 1800;
     const scale = (img.width > img.height) ? (MAX / img.width) : (MAX / img.height);
     const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height);
     const w = Math.round(img.width * scale);
     const w = Math.round(img.width * s), h = Math.round(img.height * s);
     const h = Math.round(img.height * scale);
 
     const c = document.createElement('canvas'); c.width = w; c.height = h;
    const g = c.getContext('2d', { willReadFrequently: true });
    g.imageSmoothingEnabled = true;
    g.drawImage(img, 0, 0, w, h);
 
    // → Graustufen
    let id = g.getImageData(0, 0, w, h), d = id.data;
    for (let i=0;i<d.length;i+=4){
      const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
      d[i]=d[i+1]=d[i+2]=y;
    }
    g.putImageData(id, 0, 0);


     const c = document.createElement('canvas');
    // → Unsharp (leichter Hochpass)
     c.width = w; c.height = h;
    id = g.getImageData(0,0,w,h); d = id.data;
    const ctx = c.getContext('2d');
     const copy = new Uint8ClampedArray(d);
    ctx.imageSmoothingEnabled = true;
     const idx = (x,y)=>4*(y*w+x);
     ctx.drawImage(img, 0, 0, w, h);
    for (let y=1;y<h-1;y++){
      for (let x=1;x<w-1;x++){
        const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)],
              d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)];
        const lap = 4*a - b - c0 - d0 - e;
        const v = Math.max(0, Math.min(255, a + 0.3*lap));
        d[i0]=d[i0+1]=d[i0+2]=v;
      }
    }
     g.putImageData(id,0,0);


     // Graustufen + leichter Kontrastboost
     // → adaptive Schwelle (lokaler Mittelwert)
     const id = ctx.getImageData(0, 0, w, h);
     const win = 25, half = (win|0);
    const d = id.data;
    id = g.getImageData(0,0,w,h); d = id.data;
     for (let i = 0; i < d.length; i += 4) {
     for (let y=0;y<h;y++){
       const g = 0.2126 * d[i] + 0.7152 * d[i + 1] + 0.0722 * d[i + 2];
       for (let x=0;x<w;x++){
      const v = Math.max(0, Math.min(255, (g - 128) * 1.15 + 128));
        let sum=0, cnt=0;
      d[i] = d[i + 1] = d[i + 2] = v;
        for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){
          for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){
            sum += d[4*(yy*w+xx)];
            cnt++;
          }
        }
        const thr = (sum/cnt) - 6;
        const i = 4*(y*w+x);
        const v = d[i] < thr ? 0 : 255;
        d[i]=d[i+1]=d[i+2]=v;
      }
     }
     }
     ctx.putImageData(id, 0, 0);
     g.putImageData(id,0,0);
 
    return c;
  }


     return c; // Canvas an Tesseract übergeben
  // Hilfsfunktionen für Varianten
  function crop(canvas, x, y, w, h){
    const c = document.createElement('canvas'); c.width=w; c.height=h;
    c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h);
     return c;
  }
  function rotate(canvas, deg){
    const r = document.createElement('canvas');
    const ctx = r.getContext('2d');
    if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; }
    else { r.width=canvas.height; r.height=canvas.width; }
    ctx.translate(r.width/2, r.height/2);
    ctx.rotate(deg*Math.PI/180);
    ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2);
    return r;
   }
   }


   // ========== OCR ==========
   async function ocrOne(canvas, lang) {
async function runOCR (file) {
     const res = await Tesseract.recognize(canvas, lang, {
  await ensureTesseract();
      // Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke)
  setProgress(0);
       tessedit_pageseg_mode: 11,
  const canvas = await preprocessImage(file);
       preserve_interword_spaces: 1
  try {
     const res = await Tesseract.recognize(canvas, 'deu+eng', {
       tessedit_pageseg_mode: 6,
       preserve_interword_spaces: 1,
      logger: m => { if (m?.status==='recognizing text' && typeof m.progress==='number') setProgress(m.progress); }
     });
     });
    return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 };
  }
  // =============================
  //  Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache
  // =============================
  async function runOCR(file){
    await ensureTesseract();
    setProgress(0.01);
    const base = await preprocessImage(file);
    // Kandidatenflächen
    const variants = [];
    variants.push(base); // komplett
    variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte
    variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner
    // + Rotationen
    const more = [];
    for (const v of variants){
      more.push(v, rotate(v, 90), rotate(v, -90));
    }
    // zwei Sprachmodi testen
    const results = [];
    for (const canv of more){
      for (const lang of ['deu+eng','eng']){
        try {
          const r = await ocrOne(canv, lang);
          results.push(r);
        } catch(e){ /* einzelne Fehlschläge ignorieren */ }
      }
    }
     setProgress(null);
     setProgress(null);
     return (res && res.data && res.data.text) || '';
 
  } catch (e) {
     results.sort((a,b)=> (b.conf||0)-(a.conf||0));
    // Fallback nur ENG
     return (results[0]?.text)||'';
    console.warn('[LabelScan] deu+eng fehlgeschlagen, versuche eng:', e);
    const res = await Tesseract.recognize(canvas, 'eng', {
      tessedit_pageseg_mode: 6,
      preserve_interword_spaces: 1,
      logger: m => { if (m?.status==='recognizing text' && typeof m.progress==='number') setProgress(m.progress); }
    });
    setProgress(null);
     return (res && res.data && res.data.text) || '';
   }
   }
}


  // =============================
  //  Hinweise aus OCR
  // =============================


  // ========== Hinweise aus OCR extrahieren ==========
   function extractHints (text) {
   function extractHints (text) {
     const raw = String(text || '').replace(/\s+/g, ' ').trim();
     const raw = String(text || '').replace(/\s+/g, ' ').trim();


    // Distillery-/Marken-Token
     const foundNames = [];
     const foundNames = [];
     KNOWN_TOKENS.forEach(t => {
     KNOWN_TOKENS.forEach(t => {
Zeile 181: Zeile 239:
     });
     });


     // Alter
     const ages = [];
     const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
     const ages = []; let m;
     let m;
     while ((m = ageRe.exec(raw)) !== null){ const n = m[1]; if (!ages.includes(n)) ages.push(n); }
     while ((m = ageRe.exec(raw)) !== null) {
      const n = m[1]; if (!ages.includes(n)) ages.push(n);
    }


     // Jahrgänge
     const years = [];
     const yearRe = /\b(19|20)\d{2}\b/g;
     const yearRe = /\b(19|20)\d{2}\b/g;
     const years=[]; while ((m = yearRe.exec(raw)) !== null){ if(!years.includes(m[0])) years.push(m[0]); }
     while ((m = yearRe.exec(raw)) !== null) {
      if (!years.includes(m[0])) years.push(m[0]);
    }


    // ABV
     const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
     const abvRe = /\b(\d{2}(?:[.,]\d)?)\s?%\b/g;
     const uniq = new Set(); let w; const words = [];
     const abvs=[]; while ((m = abvRe.exec(raw)) !== null){ abvs.push(m[1].replace(',','.')); }
    while ((w = wordRe.exec(raw)) !== null) {
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }
 
    return { names: foundNames, ages, years, words, raw };
  }


    // Serien-Keywords (leichtgewichtig)
  // =============================
    const seriesMap = [
  //  Suche (3 Pässe) + Fallbacks
      ['A Dream of Scotland','dream of scotland'],
  // =============================
      ['A Dream of Ireland','dream of ireland'],
      ['A Dream of... – Der Rest der Welt','rest der welt','dream of','rest of the world'],
      ['Friendly Mr. Z Whiskytainment Abfüllungen','friendly mr z','mr z'],
      ['Die Whisky Elfen Abfüllungen','whisky elfen','elfen'],
      ['The Fine Art of Whisky Abfüllungen','fine art of whisky','fine art'],
      ['Alle Rumbastic Abfüllungen','rumbastic']
    ];
    const low = norm(raw); const series=[];
    seriesMap.forEach(([label, hint])=>{
      if (low.includes(norm(hint))) series.push(label);
    });


    // „Promi-Wörter“
  function esc (s) { return mw.html.escape(String(s || '')); }
    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq = new Set(); let w; const words=[];
    while ((w = wordRe.exec(raw)) !== null){
      const s = w[0]; if(!uniq.has(s)){ uniq.add(s); words.push(s); if(words.length>=8) break; }
    }


     return { names: foundNames, ages, years, words, abvs, series, raw };
  function incatStr () {
     return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' ');
   }
   }


  // ========== Smarte Wiki-Suche (3 Pässe) ==========
   async function searchWikiSmart (hints, limit) {
   async function searchWikiSmart (hints, limit) {
     await mw.loader.using('mediawiki.api');
     await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']);
     const api = new mw.Api();
     const api = new mw.Api();
     const ns0 = 0;
     const ns0 = 0;
     const MAX = limit || 12;
     const MAX = limit || 12;
    function incatStr () {
      return ADOS_CATEGORIES.map(c => 'incategory:"' + c + '"').join(' ');
    }
async function broadSearchNoCategory(hints, limit){
  await mw.loader.using('mediawiki.api');
  const api = new mw.Api();
  const ns0 = 0;
  const MAX = limit || 12;
  // baue eine breite Suchphrase aus Distillery/Alter/Jahr/Wörtern
  const parts = []
    .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
    .map(x => `"${x}"`);
  const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');
  const r = await api.get({ action:'query', list:'search', srsearch:q, srnamespace:ns0, srlimit:MAX, formatversion:2 });
  return (r.query?.search || []);
}


     // PASS 1: intitle-Kombis (präzise)
     // PASS 1: intitle-Kombis (präzise)
Zeile 285: Zeile 317:
     for (const p of pass3) {
     for (const p of pass3) {
       const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
       const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
       (r.query?.prefixsearch || []).forEach(it=>{
       (r.query?.prefixsearch || []).forEach(it => {
         const title = it.title || it['*'];
         const title = it.title || it['*'];
         const k = title; if (seen.has(k)) return; seen.add(k);
         const k = title;
        if (seen.has(k)) return;
        seen.add(k);
         out.push({ title, snippet: '' });
         out.push({ title, snippet: '' });
       });
       });
Zeile 296: Zeile 330:
   }
   }


   // ========== Titel aus Kategorien + Fuzzy-Fallback ==========
   // ganz einfacher Fuzzy-Fallback auf Suchergebnissen
   async function fetchTitlesFromCategories(){
  function scoreTitle(title, hints){
    const t = String(title||'').toLowerCase();
    let s = 0;
    hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; });
    hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; });
    hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; });
    hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; });
    return s;
  }
 
   async function fallbackFuzzyTitles(hints, limit){
     await mw.loader.using('mediawiki.api');
     await mw.loader.using('mediawiki.api');
     const api = new mw.Api();
     const api = new mw.Api();
     const titles = new Set();
     const MAX = limit || 12;


     for (const cat of ADOS_CATEGORIES){
     // Breite Suche mit Tokens (mit/ohne Kategorie)
       let cmcontinue = undefined;
    const q1 = []
      do {
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
        const r = await api.get({
       .map(x => `"${x}"`).join(' ');
          action:'query',
    const q = `${q1} ${incatStr()}`.trim();
          list:'categorymembers',
 
          cmtitle:'Kategorie:' + cat,
    const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 });
          cmtype:'page',
    const items = (r.query?.search || []);
          cmlimit:'max',
    const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) }));
          cmcontinue
    scored.sort((a,b)=> b._score - a._score);
        });
    const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger
        (r.query?.categorymembers||[]).forEach(it=> titles.add(it.title));
     return top;
        cmcontinue = r.continue && r.continue.cmcontinue;
      } while (cmcontinue);
    }
     return Array.from(titles);
   }
   }


   function scoreTitleAgainstHints(title, hints){
   async function broadSearchNoCategory(hints, limit){
    const normTitle = norm(title);
     await mw.loader.using('mediawiki.api');
 
     const api = new mw.Api();
    // 1) Token-Overlap
     const MAX = limit || 12;
    const base = overlapScore(title, hints.raw);
 
    // 2) Distillery-Boost
    let nameBoost = 0;
    hints.names.forEach(n=>{
      const t = norm(n);
      const d = levenshtein(normTitle, t);
      if (normTitle.includes(t)) nameBoost = Math.max(nameBoost, 0.35);
      else if (d <= 4) nameBoost = Math.max(nameBoost, 0.2);
     });
 
    // 3) Alter/Jahr/ABV im Titel
    let numBoost = 0;
    hints.ages.forEach(a=>{
      if (new RegExp('\\b'+a+'\\b').test(normTitle)) numBoost = Math.max(numBoost, 0.15);
     });
    hints.years.forEach(y=>{
      if (normTitle.includes(y)) numBoost = Math.max(numBoost, 0.15);
     });
    hints.abvs.forEach(p=>{
      if (normTitle.includes(p.replace('.',''))) numBoost = Math.max(numBoost, 0.1);
    });
 
    // 4) Serien-Bonus
    let seriesBoost = 0;
    hints.series.forEach(s=>{
      const key = norm(s.split(' Abfüllungen')[0]); // Kern
      if (normTitle.includes(key)) seriesBoost = Math.max(seriesBoost, 0.15);
    });


     // 5) leichte Strafe bei sehr kleinem Overlap
     const parts = []
     const penalty = base < 0.2 ? -0.05 : 0;
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`);
     const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');


     return Math.max(0, base + nameBoost + numBoost + seriesBoost + penalty);
     const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 });
    return (r.query?.search || []);
   }
   }


   async function fallbackFuzzyTitles(hints, limit){
   // =============================
    const titles = await fetchTitlesFromCategories();
  //  Ergebnisse rendern
    const scored = titles.map(t => ({ title: t, _score: scoreTitleAgainstHints(t, hints) }));
   // =============================
    scored.sort((a,b)=> b._score - a._score);
    const top = scored.slice(0, limit||12).filter(x=> x._score >= 0.10);
    return top.map(x=> ({ title: x.title, snippet: '' }));
   }


  // ========== Treffer-Rendering ==========
   function renderResults (items) {
   function renderResults (items) {
     var box = document.getElementById('ados-scan-results');
     var box = document.getElementById('ados-scan-results');
Zeile 379: Zeile 388:
     items.slice(0, 12).forEach(function (it) {
     items.slice(0, 12).forEach(function (it) {
       var title = it.title || '';
       var title = it.title || '';
       var link = (mw && mw.util) ? mw.util.getUrl(title.replace(/ /g, '_')) : ('/wiki/' + encodeURIComponent(title.replace(/ /g,'_')));
       var link = mw.util.getUrl(title.replace(/ /g, '_'));
       var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
       var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
       var div = document.createElement('div');
       var div = document.createElement('div');
       div.className = 'ados-hit';
       div.className = 'ados-hit';
       div.innerHTML =
       div.innerHTML =
         '<b><a href="' + link + '">' + escHTML(title) + '</a></b>' +
         '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
         (snip ? '<div class="meta">' + snip + '</div>' : '');
         (snip ? '<div class="meta">' + snip + '</div>' : '');
       box.appendChild(div);
       box.appendChild(div);
Zeile 390: Zeile 399:
   }
   }


   // ========== Binding ==========
   // =============================
  //  Binding
  // =============================
 
   var BOUND = false;
   var BOUND = false;
   function bind () {
   function bind () {
Zeile 398: Zeile 410:
     var fileIn = document.getElementById('ados-scan-file');
     var fileIn = document.getElementById('ados-scan-file');
     var bigBtn = document.getElementById('ados-scan-bigbtn');
     var bigBtn = document.getElementById('ados-scan-bigbtn');
     var form   = document.getElementById('ados-scan-form');
     var form = document.getElementById('ados-scan-form');


     if (!runBtn || !fileIn) return;
     if (!runBtn || !fileIn) return;
Zeile 408: Zeile 420:
       if (this.files && this.files[0]) showPreview(this.files[0]);
       if (this.files && this.files[0]) showPreview(this.files[0]);
     });
     });
    if (form) {
      form.addEventListener('submit', function(ev){
        ev.preventDefault();
        runBtn.click();
      });
    }


     runBtn.addEventListener('click', async function (ev) {
     function onSubmit(ev){
       ev.preventDefault();
       ev.preventDefault();
       if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
       var f = fileIn.files[0];
       var f = fileIn.files[0];
       try {
       (async function(){
        runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
        try {
        setStatus('Erkenne Label …');
          runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
        var text = await runOCR(f);
          setStatus('Erkenne Label …');
        showOCRText(text);
          const text = await runOCR(f);
          showOCRText(text);


        setStatus('Suche im Wiki …');
          setStatus('Suche im Wiki …');
        var hints = extractHints(text);
          const hints = extractHints(text);
var hits = await searchWikiSmart(hints, 12);


if (!hits || !hits.length) {
          let hits = await searchWikiSmart(hints, 12);
  setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
          if (!hits || !hits.length) {
  hits = await fallbackFuzzyTitles(hints, 12);
            setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
}
            hits = await fallbackFuzzyTitles(hints, 12);
          }
          if (!hits || !hits.length) {
            setStatus('Kein Treffer – breite Suche ohne Kategorien …');
            hits = await broadSearchNoCategory(hints, 12);
          }


if (!hits || !hits.length) {
          renderResults(hits);
  setStatus('Kein Treffer – breite Suche ohne Kategorien …');
          setStatus('Fertig.');
  hits = await broadSearchNoCategory(hints, 12);
        } catch (e) {
}
          console.error('[LabelScan]', e);
          setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
        } finally {
          runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
        }
      })();
    }


renderResults(hits);
    runBtn.addEventListener('click', onSubmit);
    if (form) form.addEventListener('submit', onSubmit);


        setStatus(hits && hits.length ? 'Fertig.' : 'Keine klaren Treffer.');
     // Sicherheit
      } catch (e) {
        console.error('[LabelScan]', e);
        setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
      } finally {
        runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
      }
    });
 
     // Sicherheit gegen Overlays
     var wrap = document.getElementById('ados-labelscan');
     var wrap = document.getElementById('ados-labelscan');
     if (wrap) wrap.style.position = 'relative';
     if (wrap) wrap.style.position = 'relative';
Zeile 458: Zeile 467:
   }
   }


  // Erstbindung + Fallbacks + Observer
   if (document.readyState === 'loading') {
   if (document.readyState === 'loading') {
     document.addEventListener('DOMContentLoaded', bind);
     document.addEventListener('DOMContentLoaded', bind);
Zeile 468: Zeile 476:
   var mo = new MutationObserver(function () { if (!BOUND) bind(); });
   var mo = new MutationObserver(function () { if (!BOUND) bind(); });
   mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
   mo.observe(document.documentElement || document.body, { childList: true, subtree: true });
})();
})();

Version vom 5. November 2025, 23:20 Uhr

/* global mw, Tesseract */
(function () {
  'use strict';

  // =============================
  //   KONFIGURATION
  // =============================

  // ← Für Tests leer lassen: const ADOS_CATEGORIES = [];
  const ADOS_CATEGORIES = [
    'Alle A Dream of Scotland Abfüllungen',
    'Alle A Dream of Ireland Abfüllungen',
    'Alle A Dream of... – Der Rest der Welt Abfüllungen',
    'Friendly Mr. Z Whiskytainment Abfüllungen',
    'Die Whisky Elfen Abfüllungen',
    'The Fine Art of Whisky Abfüllungen',
    'Alle Rumbastic Abfüllungen'
  ];

  const KNOWN_TOKENS = [
    // Serien / Reihen
    'The Tasteful 8', 'Heroes of Childhood', 'A Dream of Scotland',
    'A Dream of Ireland', 'The Fine Art of Whisky', 'Friendly Mr. Z',
    'Die Whisky Elfen', 'Rumbastic',

    // Brennereien / Regionen
    'Ardbeg','Ardmore','Arran','Auchroisk','Ben Nevis','Blair Athol','Bowmore',
    'Caol Ila','Clynelish','Glenallachie','Glenrothes','Longmorn','Lagavulin',
    'Tullibardine','Dalmore','Benrinnes','Mortlach','Glenlivet','Inchgower',
    'Islay','Speyside','Highland','Lowland','Campbeltown','Ireland','Irland',

    // typische Label-Wörter
    'Cask Strength','First Fill','Bourbon Barrel','Sherry','PX','Oloroso'
  ];

  // =============================
  //   UI-Hilfen
  // =============================

  function hasUI () {
    return !!document.getElementById('ados-scan-run') &&
           !!document.getElementById('ados-scan-file');
  }

  function setStatus (t) {
    var el = document.getElementById('ados-scan-status');
    if (el) el.textContent = t || '';
  }

  function setProgress (p) {
    var bar = document.getElementById('ados-scan-progress');
    if (!bar) return;
    if (p == null) { bar.hidden = true; bar.value = 0; }
    else { bar.hidden = false; bar.value = Math.max(0, Math.min(1, p)); }
  }

  function showPreview (file) {
    var url = URL.createObjectURL(file);
    var prev = document.getElementById('ados-scan-preview');
    if (prev) {
      prev.innerHTML = '<img alt="Vorschau" src="' + url + '">';
      prev.setAttribute('aria-hidden', 'false');
    }
  }

  function showOCRText (t) {
    var el = document.getElementById('ados-scan-ocr');
    if (el) el.textContent = (t || '').trim();
  }

  // =============================
  //   Tesseract laden (nur 1x)
  // =============================

  var tesseractReady;
  function ensureTesseract () {
    if (tesseractReady) return tesseractReady;
    tesseractReady = new Promise(function (resolve, reject) {
      if (window.Tesseract) return resolve();
      var s = document.createElement('script');
      s.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js';
      s.async = true;
      s.onload = resolve;
      s.onerror = function () {
        var s2 = document.createElement('script');
        s2.src = 'https://unpkg.com/tesseract.js@5/dist/tesseract.min.js';
        s2.async = true;
        s2.onload = resolve;
        s2.onerror = function () { reject(new Error('Tesseract konnte nicht geladen werden')); };
        document.head.appendChild(s2);
      };
      document.head.appendChild(s);
    });
    return tesseractReady;
  }

  // =============================
  //   Vorverarbeitung (OCR)
  //   Graustufen + Unsharp + adaptive Schwelle
  // =============================

  async function preprocessImage (file) {
    const img = await new Promise((res, rej) => {
      const o = new Image();
      o.onload = () => res(o);
      o.onerror = rej;
      o.src = URL.createObjectURL(file);
    });

    const MAX = 1800;
    const s = Math.min(1, (img.width > img.height) ? MAX / img.width : MAX / img.height);
    const w = Math.round(img.width * s), h = Math.round(img.height * s);

    const c = document.createElement('canvas'); c.width = w; c.height = h;
    const g = c.getContext('2d', { willReadFrequently: true });
    g.imageSmoothingEnabled = true;
    g.drawImage(img, 0, 0, w, h);

    // → Graustufen
    let id = g.getImageData(0, 0, w, h), d = id.data;
    for (let i=0;i<d.length;i+=4){
      const y = 0.2126*d[i] + 0.7152*d[i+1] + 0.0722*d[i+2];
      d[i]=d[i+1]=d[i+2]=y;
    }
    g.putImageData(id, 0, 0);

    // → Unsharp (leichter Hochpass)
    id = g.getImageData(0,0,w,h); d = id.data;
    const copy = new Uint8ClampedArray(d);
    const idx = (x,y)=>4*(y*w+x);
    for (let y=1;y<h-1;y++){
      for (let x=1;x<w-1;x++){
        const i0=idx(x,y), a=copy[i0], b=copy[idx(x-1,y)], c0=copy[idx(x+1,y)],
              d0=copy[idx(x,y-1)], e=copy[idx(x,y+1)];
        const lap = 4*a - b - c0 - d0 - e;
        const v = Math.max(0, Math.min(255, a + 0.3*lap));
        d[i0]=d[i0+1]=d[i0+2]=v;
      }
    }
    g.putImageData(id,0,0);

    // → adaptive Schwelle (lokaler Mittelwert)
    const win = 25, half = (win|0);
    id = g.getImageData(0,0,w,h); d = id.data;
    for (let y=0;y<h;y++){
      for (let x=0;x<w;x++){
        let sum=0, cnt=0;
        for (let yy=Math.max(0,y-half); yy<=Math.min(h-1,y+half); yy+=5){
          for (let xx=Math.max(0,x-half); xx<=Math.min(w-1,x+half); xx+=5){
            sum += d[4*(yy*w+xx)];
            cnt++;
          }
        }
        const thr = (sum/cnt) - 6;
        const i = 4*(y*w+x);
        const v = d[i] < thr ? 0 : 255;
        d[i]=d[i+1]=d[i+2]=v;
      }
    }
    g.putImageData(id,0,0);

    return c;
  }

  // Hilfsfunktionen für Varianten
  function crop(canvas, x, y, w, h){
    const c = document.createElement('canvas'); c.width=w; c.height=h;
    c.getContext('2d').drawImage(canvas, x, y, w, h, 0, 0, w, h);
    return c;
  }
  function rotate(canvas, deg){
    const r = document.createElement('canvas');
    const ctx = r.getContext('2d');
    if (deg % 180 === 0){ r.width=canvas.width; r.height=canvas.height; }
    else { r.width=canvas.height; r.height=canvas.width; }
    ctx.translate(r.width/2, r.height/2);
    ctx.rotate(deg*Math.PI/180);
    ctx.drawImage(canvas, -canvas.width/2, -canvas.height/2);
    return r;
  }

  async function ocrOne(canvas, lang) {
    const res = await Tesseract.recognize(canvas, lang, {
      // Sparse text funktioniert bei Labels (verschieden orientierte Textblöcke)
      tessedit_pageseg_mode: 11,
      preserve_interword_spaces: 1
    });
    return { text: (res?.data?.text||'').trim(), conf: res?.data?.confidence||0 };
  }

  // =============================
  //   Mehrfach-OCR (Rotationen/Regionen) + Fallback-Sprache
  // =============================

  async function runOCR(file){
    await ensureTesseract();
    setProgress(0.01);
    const base = await preprocessImage(file);

    // Kandidatenflächen
    const variants = [];
    variants.push(base); // komplett
    variants.push(crop(base, 0, 0, Math.round(base.width*0.4), base.height)); // linke Spalte
    variants.push(crop(base, 0, Math.round(base.height*0.72), base.width, Math.round(base.height*0.28))); // unteres Banner

    // + Rotationen
    const more = [];
    for (const v of variants){
      more.push(v, rotate(v, 90), rotate(v, -90));
    }

    // zwei Sprachmodi testen
    const results = [];
    for (const canv of more){
      for (const lang of ['deu+eng','eng']){
        try {
          const r = await ocrOne(canv, lang);
          results.push(r);
        } catch(e){ /* einzelne Fehlschläge ignorieren */ }
      }
    }
    setProgress(null);

    results.sort((a,b)=> (b.conf||0)-(a.conf||0));
    return (results[0]?.text)||'';
  }

  // =============================
  //   Hinweise aus OCR
  // =============================

  function extractHints (text) {
    const raw = String(text || '').replace(/\s+/g, ' ').trim();

    const foundNames = [];
    KNOWN_TOKENS.forEach(t => {
      const re = new RegExp('\\b' + t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '\\b', 'i');
      if (re.test(raw)) foundNames.push(t);
    });

    const ages = [];
    const ageRe = /\b([1-9]\d?)\s?(?:years?|yo|jahr(?:e)?)\b/gi;
    let m;
    while ((m = ageRe.exec(raw)) !== null) {
      const n = m[1]; if (!ages.includes(n)) ages.push(n);
    }

    const years = [];
    const yearRe = /\b(19|20)\d{2}\b/g;
    while ((m = yearRe.exec(raw)) !== null) {
      if (!years.includes(m[0])) years.push(m[0]);
    }

    const wordRe = /\b[A-ZÄÖÜ][A-Za-zÄÖÜäöüß\-]{3,}\b/g;
    const uniq = new Set(); let w; const words = [];
    while ((w = wordRe.exec(raw)) !== null) {
      const s = w[0];
      if (!uniq.has(s)) { uniq.add(s); words.push(s); if (words.length >= 8) break; }
    }

    return { names: foundNames, ages, years, words, raw };
  }

  // =============================
  //   Suche (3 Pässe) + Fallbacks
  // =============================

  function esc (s) { return mw.html.escape(String(s || '')); }

  function incatStr () {
    return (ADOS_CATEGORIES || []).map(c => 'incategory:"' + c + '"').join(' ');
  }

  async function searchWikiSmart (hints, limit) {
    await mw.loader.using(['mediawiki.api','mediawiki.util','mediawiki.html']);
    const api = new mw.Api();
    const ns0 = 0;
    const MAX = limit || 12;

    // PASS 1: intitle-Kombis (präzise)
    const pass1 = [];
    if (hints.names.length) {
      hints.names.forEach(n => {
        if (hints.ages.length) hints.ages.forEach(a => pass1.push(`intitle:"${n}" intitle:${a} ${incatStr()}`));
        if (hints.years.length) hints.years.forEach(y => pass1.push(`intitle:"${n}" "${y}" ${incatStr()}`));
        pass1.push(`intitle:"${n}" ${incatStr()}`);
      });
    }

    // PASS 2: gewichtete Volltextsuche
    const key = []
      .concat(hints.names.slice(0, 2), hints.ages.slice(0, 1), hints.years.slice(0, 1), hints.words.slice(0, 3))
      .map(x => `"${x}"`).join(' ');
    const pass2 = key ? [ `${key} ${incatStr()}` ] : [];

    // PASS 3: Prefix auf Titel
    const pass3 = [];
    if (hints.names.length) pass3.push(hints.names[0]);
    if (!pass3.length && hints.words.length) pass3.push(hints.words[0]);

    const seen = new Set(); const out = [];

    async function runSr (q) {
      const r = await api.get({ action: 'query', list: 'search', srsearch: q, srnamespace: ns0, srlimit: MAX, formatversion: 2 });
      (r.query?.search || []).forEach(it => {
        const k = it.title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push(it);
      });
    }

    for (const q of pass1) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }
    for (const q of pass2) { await runSr(q); if (out.length >= MAX) return out.slice(0, MAX); }

    // Prefix (list=prefixsearch)
    for (const p of pass3) {
      const r = await api.get({ action: 'query', list: 'prefixsearch', pssearch: p, psnamespace: ns0, pslimit: MAX });
      (r.query?.prefixsearch || []).forEach(it => {
        const title = it.title || it['*'];
        const k = title;
        if (seen.has(k)) return;
        seen.add(k);
        out.push({ title, snippet: '' });
      });
      if (out.length >= MAX) break;
    }

    return out.slice(0, MAX);
  }

  // ganz einfacher Fuzzy-Fallback auf Suchergebnissen
  function scoreTitle(title, hints){
    const t = String(title||'').toLowerCase();
    let s = 0;
    hints.names.forEach(n => { if (t.includes(n.toLowerCase())) s += 1.0; });
    hints.words.forEach(n => { if (t.includes(n.toLowerCase())) s += 0.4; });
    hints.ages.forEach(a => { if (t.includes(String(a))) s += 0.4; });
    hints.years.forEach(y => { if (t.includes(String(y))) s += 0.4; });
    return s;
  }

  async function fallbackFuzzyTitles(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12;

    // Breite Suche mit Tokens (mit/ohne Kategorie)
    const q1 = []
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`).join(' ');
    const q = `${q1} ${incatStr()}`.trim();

    const r = await api.get({ action:'query', list:'search', srsearch:q || hints.raw.split(/\s+/).slice(0,6).join(' '), srlimit:50, formatversion:2 });
    const items = (r.query?.search || []);
    const scored = items.map(it => ({ ...it, _score: scoreTitle(it.title, hints) }));
    scored.sort((a,b)=> b._score - a._score);
    const top = scored.slice(0, MAX).filter(x=> x._score >= 0.10); // großzügiger
    return top;
  }

  async function broadSearchNoCategory(hints, limit){
    await mw.loader.using('mediawiki.api');
    const api = new mw.Api();
    const MAX = limit || 12;

    const parts = []
      .concat(hints.names.slice(0,2), hints.ages.slice(0,1), hints.years.slice(0,1), hints.words.slice(0,3))
      .map(x => `"${x}"`);
    const q = parts.length ? parts.join(' ') : hints.raw.split(/\s+/).slice(0,6).join(' ');

    const r = await api.get({ action:'query', list:'search', srsearch:q, srlimit:MAX, formatversion:2 });
    return (r.query?.search || []);
  }

  // =============================
  //   Ergebnisse rendern
  // =============================

  function renderResults (items) {
    var box = document.getElementById('ados-scan-results');
    if (!box) return;
    box.innerHTML = '';
    if (!items || !items.length) {
      box.innerHTML = '<div class="ados-hit">Keine klaren Treffer. Bitte anderes Foto oder manuell suchen.</div>';
      return;
    }
    items.slice(0, 12).forEach(function (it) {
      var title = it.title || '';
      var link = mw.util.getUrl(title.replace(/ /g, '_'));
      var snip = String(it.snippet || '').replace(/<\/?span[^>]*>/g, '').replace(/&quot;/g, '"');
      var div = document.createElement('div');
      div.className = 'ados-hit';
      div.innerHTML =
        '<b><a href="' + link + '">' + esc(title) + '</a></b>' +
        (snip ? '<div class="meta">' + snip + '</div>' : '');
      box.appendChild(div);
    });
  }

  // =============================
  //   Binding
  // =============================

  var BOUND = false;
  function bind () {
    if (BOUND || !hasUI()) return;

    var runBtn = document.getElementById('ados-scan-run');
    var fileIn = document.getElementById('ados-scan-file');
    var bigBtn = document.getElementById('ados-scan-bigbtn');
    var form = document.getElementById('ados-scan-form');

    if (!runBtn || !fileIn) return;
    if (runBtn.dataset.bound === '1') return;
    runBtn.dataset.bound = '1'; BOUND = true;

    if (bigBtn) bigBtn.addEventListener('click', function () { fileIn.click(); });
    fileIn.addEventListener('change', function () {
      if (this.files && this.files[0]) showPreview(this.files[0]);
    });

    function onSubmit(ev){
      ev.preventDefault();
      if (!(fileIn.files && fileIn.files[0])) { alert('Bitte ein Foto auswählen oder aufnehmen.'); return; }
      var f = fileIn.files[0];
      (async function(){
        try {
          runBtn.disabled = true; runBtn.textContent = 'Erkenne …';
          setStatus('Erkenne Label …');
          const text = await runOCR(f);
          showOCRText(text);

          setStatus('Suche im Wiki …');
          const hints = extractHints(text);

          let hits = await searchWikiSmart(hints, 12);
          if (!hits || !hits.length) {
            setStatus('Kein direkter Treffer – Fuzzy über Kategorien …');
            hits = await fallbackFuzzyTitles(hints, 12);
          }
          if (!hits || !hits.length) {
            setStatus('Kein Treffer – breite Suche ohne Kategorien …');
            hits = await broadSearchNoCategory(hints, 12);
          }

          renderResults(hits);
          setStatus('Fertig.');
        } catch (e) {
          console.error('[LabelScan]', e);
          setStatus('Fehler bei Erkennung/Suche. Bitte erneut versuchen.');
        } finally {
          runBtn.disabled = false; runBtn.textContent = 'Erkennen & suchen';
        }
      })();
    }

    runBtn.addEventListener('click', onSubmit);
    if (form) form.addEventListener('submit', onSubmit);

    // Sicherheit
    var wrap = document.getElementById('ados-labelscan');
    if (wrap) wrap.style.position = 'relative';
    runBtn.style.position = 'relative';
    runBtn.style.zIndex = '9999';
    runBtn.style.pointerEvents = 'auto';
  }

  if (document.readyState === 'loading') {
    document.addEventListener('DOMContentLoaded', bind);
  } else {
    bind();
  }
  setTimeout(bind, 250);
  setTimeout(bind, 1000);
  var mo = new MutationObserver(function () { if (!BOUND) bind(); });
  mo.observe(document.documentElement || document.body, { childList: true, subtree: true });

})();