Skip to content

Commit

Permalink
fix more search edge-cases
Browse files Browse the repository at this point in the history
  • Loading branch information
shish committed Dec 14, 2023
1 parent e114057 commit afaf7bb
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 35 deletions.
84 changes: 55 additions & 29 deletions core/imageboard/image.php
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ private static function find_images_internal(int $start = 0, ?int $limit = null,
}
}

$querylet = Image::build_search_querylet($tags, $limit, $start);
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order, $limit, $start);
return $database->get_all_iterable($querylet->sql, $querylet->variables);
}

Expand Down Expand Up @@ -246,7 +247,8 @@ public static function count_images(array $tags = []): int
if (Extension::is_enabled(RatingsInfo::KEY)) {
$tags[] = "rating:*";
}
$querylet = Image::build_search_querylet($tags);
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
$total = (int)$database->get_one("SELECT COUNT(*) AS cnt FROM ($querylet->sql) AS tbl", $querylet->variables);
if (SPEED_HAX && $total > 5000) {
// when we have a ton of images, the count
Expand Down Expand Up @@ -307,7 +309,8 @@ public function get_next(array $tags = [], bool $next = true): ?Image
} else {
$tags[] = 'id'. $gtlt . $this->id;
$tags[] = 'order:id_'. strtolower($dir);
$querylet = Image::build_search_querylet($tags);
[$tag_conditions, $img_conditions, $order] = self::terms_to_conditions($tags);
$querylet = self::build_search_querylet($tag_conditions, $img_conditions, $order);
$querylet->append_sql(' LIMIT 1');
$row = $database->get_row($querylet->sql, $querylet->variables);
}
Expand Down Expand Up @@ -749,13 +752,13 @@ private static function tag_or_wildcard_to_ids(string $tag): array
}

/**
* Turn a human input string into a an abstract search query
*
* @param string[] $terms
* @return array{0: TagCondition[], 1: ImgCondition[], 2: string}
*/
private static function build_search_querylet(
array $terms,
?int $limit = null,
?int $offset = null
): Querylet {
private static function terms_to_conditions(array $terms): array
{
global $config;

$tag_conditions = [];
Expand All @@ -776,18 +779,30 @@ private static function build_search_querylet(

$order = ($order ?: "images.".$config->get_string(IndexConfig::ORDER));

/*
* Turn a bunch of Querylet objects into a base query
*
* Must follow the format
*
* SELECT images.*
* FROM (...) AS images
* WHERE (...)
*
* ie, return a set of images.* columns, and end with a WHERE
*/
return [$tag_conditions, $img_conditions, $order];
}

/**
* Turn an abstract search query into an SQL Querylet
*
* Must follow the format
*
* SELECT images.*
* FROM (...) AS images
* WHERE (...)
*
* ie, return a set of images.* columns, and end with a WHERE
*
* @param TagCondition[] $tag_conditions
* @param ImgCondition[] $img_conditions
*/
private static function build_search_querylet(
array $tag_conditions,
array $img_conditions,
string $order,
?int $limit = null,
?int $offset = null
): Querylet {
// no tags, do a simple search
if (count($tag_conditions) === 0) {
$query = new Querylet("SELECT images.* FROM images WHERE 1=1");
Expand All @@ -796,15 +811,28 @@ private static function build_search_querylet(
// one tag sorted by ID - we can fetch this from the image_tags table,
// and do the offset / limit there, which is 10x faster than fetching
// all the image_tags and doing the offset / limit on the result.
//
// NOTE: this is currently impossible to test, because the test suite
// loads all extensions, some of whom add generic img_conditions onto
// the search, which prevents this optimisation from being used.
elseif (
count($tag_conditions) === 1
&& $tag_conditions[0]->positive
// We can only do this if img_conditions is empty, because
// we're going to apply the offset / limit to the image_tags
// subquery, and applying extra conditions to the top-level
// query might reduce the total results below the target limit
&& empty($img_conditions)
// We can only do this if we're sorting by ID, because
// we're going to be using the image_tags table, which
// only has image_id and tag_id, not any other columns
&& ($order == "id DESC" || $order == "images.id DESC")
&& !is_null($offset)
// This is only an optimisation if we are applying limit
// and offset
&& !is_null($limit)
&& !is_null($offset)
) {
$tc = $tag_conditions[0];
$in = $tc->positive ? "IN" : "NOT IN";
// IN (SELECT id FROM tags) is 100x slower than doing a separate
// query and then a second query for IN(first_query_results)??
$tag_array = self::tag_or_wildcard_to_ids($tc->tag);
Expand All @@ -820,17 +848,16 @@ private static function build_search_querylet(
$query = new Querylet("
SELECT images.*
FROM images INNER JOIN (
SELECT it.image_id
SELECT DISTINCT it.image_id
FROM image_tags it
WHERE it.tag_id $in ($set)
WHERE it.tag_id IN ($set)
ORDER BY it.image_id DESC
LIMIT :limit OFFSET :offset
) a on a.image_id = images.id
ORDER BY images.id DESC
WHERE 1=1
", ["limit" => $limit, "offset" => $offset]);
// don't offset at the image level because
// we already offset at the image_tags level
$order = null;
$limit = null;
$offset = null;
}
Expand Down Expand Up @@ -890,7 +917,7 @@ private static function build_search_querylet(
}

$first = array_shift($inner_joins);
$sub_query = "SELECT it.image_id FROM image_tags it ";
$sub_query = "SELECT DISTINCT it.image_id FROM image_tags it ";
$i = 0;
foreach ($inner_joins as $inner_join) {
$i++;
Expand Down Expand Up @@ -946,9 +973,8 @@ private static function build_search_querylet(
$query->append(new Querylet($img_sql, $img_vars));
}

if (!is_null($order)) {
$query->append(new Querylet(" ORDER BY ".$order));
}
$query->append(new Querylet(" ORDER BY ".$order));

if (!is_null($limit)) {
$query->append(new Querylet(" LIMIT :limit ", ["limit" => $limit]));
$query->append(new Querylet(" OFFSET :offset ", ["offset" => $offset]));
Expand Down
26 changes: 20 additions & 6 deletions ext/index/test.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,21 @@ public function testWildSearchOneResult($image_ids)
}

#[Depends('testUpload')]
public function testWildSearchManyResults($image_ids)
public function testWildSearchManyResultsSimple($image_ids)
{
$image_ids = $this->testUpload();
// two images match comp* - one matches it once,
// one matches it twice
// two images match comp* - one matches it once, one matches it twice
$this->assert_search_results(["comp*"], [$image_ids[1], $image_ids[0]]);
}

#[Depends('testUpload')]
public function testWildSearchManyResultsComplex($image_ids)
{
$image_ids = $this->testUpload();
// same thing, but with the complex branch
$this->assert_search_results(["comp*", "-asdf"], [$image_ids[1], $image_ids[0]]);
}

/* * * * * * * * * * *
* Mixed *
* * * * * * * * * * */
Expand All @@ -196,20 +203,27 @@ public function testMixedSearchTagMeta($image_ids)
// multiple tags, many results
$this->assert_search_results(["computer", "size=640x480"], [$image_ids[1], $image_ids[0]]);
}
// tag + negative
// wildcards + ???

/* * * * * * * * * * *
* Negative *
* * * * * * * * * * */
#[Depends('testUpload')]
public function testNegative($image_ids)
public function testSubtractFromSearch($image_ids)
{
$image_ids = $this->testUpload();

// negative tag, should have one result
$this->assert_search_results(["computer", "-pbx"], [$image_ids[1]]);

// removing something that doesn't exist should have no effect
$this->assert_search_results(["computer", "-not_a_tag"], [$image_ids[1], $image_ids[0]]);
}

#[Depends('testUpload')]
public function testSubtractFromDefault($image_ids)
{
$image_ids = $this->testUpload();

// negative tag alone, should work
$this->assert_search_results(["-pbx"], [$image_ids[1]]);

Expand Down

0 comments on commit afaf7bb

Please sign in to comment.