Skip to content

Commit

Permalink
Merge pull request #303 from topikachu/feature/tika
Browse files Browse the repository at this point in the history
Feat: Use tika to detect content type more accurately
  • Loading branch information
jglick authored Aug 1, 2022
2 parents 282a772 + 9627797 commit 6c461c3
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 2 deletions.
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@
<artifactId>jaxb</artifactId>
<version>2.3.6-1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.18</version>
</dependency>
<dependency>
<groupId>org.apache.jclouds.provider</groupId>
<artifactId>aws-s3</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.NoExternalUse;

import static io.jenkins.plugins.artifact_manager_jclouds.TikaUtil.detectByTika;

/**
* Jenkins artifact/stash implementation using any blob store supported by Apache jclouds.
* To offer a new backend, implement {@link BlobStoreProvider}.
Expand Down Expand Up @@ -159,6 +161,9 @@ public Map<String, String> invoke(File f, VirtualChannel channel) {
if (contentType == null) {
contentType = URLConnection.guessContentTypeFromName(theFile.getName());
}
if (contentType == null) {
contentType = detectByTika(theFile);
}
contentTypes.put(relPath, contentType);
} catch (IOException e) {
Functions.printStackTrace(e, listener.error("Unable to determine content type for file: " + theFile));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package io.jenkins.plugins.artifact_manager_jclouds;

import org.apache.tika.Tika;

import java.io.File;
import java.io.IOException;

public class TikaUtil {

private static Tika tika = new Tika();

static String detectByTika(File f) throws IOException {
return tika.detect(f);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,13 @@ public void nonAdmin() throws Exception {
public void contentType() throws Exception {
String text = "some regular text";
String html = "<html><header></header><body>Test file contents</body></html>";

String json = "{\"key\":\"value\"}";
ArtifactManagerConfiguration.get().getArtifactManagerFactories().add(getArtifactManagerFactory(null, null));

j.createSlave("remote", null, null);

WorkflowJob p = j.createProject(WorkflowJob.class, "p");
p.setDefinition(new CpsFlowDefinition("node('remote') {writeFile file: 'f.txt', text: '" + text + "'; writeFile file: 'f.html', text: '" + html + "'; writeFile file: 'f', text: '\\u0000'; archiveArtifacts 'f*'}", true));
p.setDefinition(new CpsFlowDefinition("node('remote') {writeFile file: 'f.txt', text: '" + text + "'; writeFile file: 'f.html', text: '" + html + "'; writeFile file: 'f', text: '\\u0000';writeFile file: 'f.json', text: '" + json +"'; archiveArtifacts 'f*'}", true));
j.buildAndAssertSuccess(p);

WebResponse response = j.createWebClient().goTo("job/p/1/artifact/f.txt", null).getWebResponse();
Expand All @@ -301,6 +301,9 @@ public void contentType() throws Exception {
response = j.createWebClient().goTo("job/p/1/artifact/f", null).getWebResponse();
assertThat(response.getContentLength(), equalTo(1L));
assertThat(response.getContentType(), containsString("/octet-stream"));
response = j.createWebClient().goTo("job/p/1/artifact/f.json", null).getWebResponse();
assertThat(response.getContentAsString(), equalTo(json));
assertThat(response.getContentType(), equalTo("application/json"));
}

//@Test
Expand Down

0 comments on commit 6c461c3

Please sign in to comment.