diff --git a/lib/food_ingredient_parser/loose/scanner.rb b/lib/food_ingredient_parser/loose/scanner.rb index 7b4bdcc..a2d30c3 100644 --- a/lib/food_ingredient_parser/loose/scanner.rb +++ b/lib/food_ingredient_parser/loose/scanner.rb @@ -5,7 +5,7 @@ class Scanner SEP_CHARS = "|;,.".freeze MARK_CHARS = "¹²³⁴⁵ᵃᵇᶜᵈᵉᶠᵍªº⁽⁾†‡⁺•°▪◊#^˄*~".freeze - PREFIX_RE = /\A\s*(ingredients|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze + PREFIX_RE = /\A\s*(ingredients(\s*list)?|contains|ingred[iï][eë]nt(en)?(declaratie)?|bevat|dit zit er\s?in|samenstelling|zutaten)\b\s*[:;.]?\s*/i.freeze NOTE_RE = /\A\b(dit product kan\b|deze verpakking kan\b|kan sporen\b.*?\bbevatten\b|voor allergenen\b|allergenen\b|allergie[- ]informatie(\s*:|\b)|E\s*=|gemaakt in\b|geproduceerd in\b|bevat mogelijk\b|kijk voor meer\b|allergie-info|in de fabriek\b|in dit bedrijf\b|voor [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bis [0-9,.]+ (g\.?|gr\.?|ram|ml).*\bgebruikt\b)/i.freeze # Keep in sync with +abbrev+ in the +Common+ grammar, plus relevant ones from the +Amount+ grammar. ABBREV_RE = Regexp.union( diff --git a/lib/food_ingredient_parser/strict/grammar/root.treetop b/lib/food_ingredient_parser/strict/grammar/root.treetop index a472297..64004ab 100644 --- a/lib/food_ingredient_parser/strict/grammar/root.treetop +++ b/lib/food_ingredient_parser/strict/grammar/root.treetop @@ -19,9 +19,10 @@ module FoodIngredientParser::Strict::Grammar rule root_prefix ( - 'ingredients'i / 'contains'i / + 'ingredients'i ( ws+ 'list'i )? / 'contains'i / ('ingred'i [IÏiï] [EËeë] 'n'i ( 't'i 'en'i? 'declaratie'i? )? ) / 'bevat'i / 'dit zit er in'i / 'samenstelling'i / - 'zutaten'i + 'zutaten'i / + 'ingredienser'i ) ( ws* [:;.] ( ws* newline )? / ws* newline / ws ) ws* # optional colon or other separator "'"? ws* # stray quote occurs sometimes