Skip to content

Commit

Permalink
[NFT Metadata Crawler] Increase URI Parser coverage for redirected IP…
Browse files Browse the repository at this point in the history
…FS public gateway URIs (aptos-labs#10743)

* additional coverage

* comments
  • Loading branch information
just-in-chang authored Oct 31, 2023
1 parent aee3069 commit ee2a636
Showing 1 changed file with 29 additions and 14 deletions.
43 changes: 29 additions & 14 deletions ecosystem/nft-metadata-crawler-parser/src/utils/uri_parser.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright © Aptos Foundation

use crate::utils::counters::{PARSE_URI_INVOCATION_COUNT, PARSE_URI_TYPE_COUNT};
use regex::Regex;
use regex::{Captures, Regex};
use url::Url;

pub struct URIParser;
Expand All @@ -25,24 +25,34 @@ impl URIParser {
// Expects the following format for provided URIs `ipfs/{CID}/{path}`
let re = Regex::new(r"^(ipfs/)(?P<cid>[a-zA-Z0-9]+)(?P<path>/.*)?$")?;

// Expects the following format for provided URIs `https://{CID}.ipfs.com/{path}`
let redir_re = Regex::new(r"https:\/\/(?P<cid>[^\.]+)\.ipfs\.[^\/]+(?P<path>\/.+)?")?;

let path = Url::parse(&modified_uri)?
.path_segments()
.map(|segments| segments.collect::<Vec<_>>().join("/"));

if let Some(captures) = re.captures(&path.unwrap_or_default()) {
let cid = captures["cid"].to_string();
let path = captures.name("path").map(|m| m.as_str().to_string());

PARSE_URI_TYPE_COUNT.with_label_values(&["ipfs"]).inc();
Ok(format!(
"{}{}{}",
ipfs_prefix,
cid,
path.unwrap_or_default()
))
} else {
Err(anyhow::anyhow!("Invalid IPFS URI"))
if let Some(captures) = re
.captures(&path.unwrap_or_default())
.or_else(|| redir_re.captures(&modified_uri))
{
return Self::format_capture(captures, ipfs_prefix);
}
Err(anyhow::anyhow!("Invalid IPFS URI"))
}

/// Formats a capture group into a URI.
fn format_capture(captures: Captures<'_>, ipfs_prefix: String) -> anyhow::Result<String> {
let cid = captures["cid"].to_string();
let path = captures.name("path").map(|m| m.as_str().to_string());

PARSE_URI_TYPE_COUNT.with_label_values(&["ipfs"]).inc();
Ok(format!(
"{}{}{}",
ipfs_prefix,
cid,
path.unwrap_or_default()
))
}
}

Expand Down Expand Up @@ -84,6 +94,11 @@ mod tests {
URIParser::parse(IPFS_PREFIX.to_string(), test_public_gateway_uri_no_path).unwrap();
assert_eq!(parsed_uri, format!("{}{}/{}", IPFS_PREFIX, CID, ""));

// Some submitted URIs are in the redirected format
let test_ipfs_redirect = format!("https://{}.ipfs.re.dir.io/{}", CID, PATH);
let parsed_uri = URIParser::parse(IPFS_PREFIX.to_string(), test_ipfs_redirect).unwrap();
assert_eq!(parsed_uri, format!("{IPFS_PREFIX}{CID}/{PATH}"));

// Public gateway URIs must contain a CID, expect error here
let test_public_gateway_uri_no_cid = format!("https://ipfs.io/ipfs/{}/{}", "", PATH);
let parsed_uri = URIParser::parse(IPFS_PREFIX.to_string(), test_public_gateway_uri_no_cid);
Expand Down

0 comments on commit ee2a636

Please sign in to comment.