Skip to content

Commit

Permalink
Merge pull request #30 from openzim/add_inline_css
Browse files Browse the repository at this point in the history
Apply proper CSS for proper page display - step 2
  • Loading branch information
benoit74 authored Oct 10, 2024
2 parents 152e8b7 + 738aa31 commit ff4240b
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 5 deletions.
10 changes: 10 additions & 0 deletions scraper/src/libretexts2zim/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ class LibreTextsParsingError(Exception):


class LibreTextsHome(BaseModel):
home_url: str
welcome_text_paragraphs: list[str]
welcome_image_url: str
screen_css_url: str
print_css_url: str
inline_css: list[str]


LibraryPageId = str
Expand Down Expand Up @@ -210,6 +212,8 @@ def get_home(self) -> LibreTextsHome:
welcome_image_url=_get_welcome_image_url_from_home(soup),
screen_css_url=_get_screen_css_url_from_home(soup),
print_css_url=_get_print_css_url_from_home(soup),
inline_css=_get_inline_css_from_home(soup),
home_url=f"{self.library_url}/",
)

def get_deki_token(self) -> str:
Expand Down Expand Up @@ -405,3 +409,9 @@ def _get_screen_css_url_from_home(soup: BeautifulSoup) -> str:
def _get_print_css_url_from_home(soup: BeautifulSoup) -> str:
"""Returns the URL of print CSS found on home page"""
return _get_any_css_url_from_home(soup, "print")


def _get_inline_css_from_home(soup: BeautifulSoup) -> list[str]:
"""Returns inline CSS code found on home page"""
links = soup.find_all("style", {"type": "text/css"})
return [link.text for link in links if link.text]
8 changes: 7 additions & 1 deletion scraper/src/libretexts2zim/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@ def process(self, css_original_url: str, css_content: bytes) -> str:
css_original_url,
rules, # pyright: ignore[reportUnknownArgumentType]
)
return serialize(rules)
return serialize(
[
rule
for rule in rules # pyright: ignore[reportUnknownVariableType]
if not isinstance(rule, ast.ParseError)
]
)

def _process_url(
self, css_original_url: str, css_url: str
Expand Down
6 changes: 6 additions & 0 deletions scraper/src/libretexts2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ def run(self) -> Path:
add_item_for(creator, "content/print.css", content=result)
del print_css

result = css_processor.process(
css_original_url=home.home_url,
css_content=("\n".join(home.inline_css)).encode(),
)
add_item_for(creator, "content/inline.css", content=result)

logger.info(f" Retrieving {len(css_processor.css_assets)} CSS assets...")
for asset_url, asset_path in css_processor.css_assets.items():
try:
Expand Down
10 changes: 10 additions & 0 deletions scraper/tests-integration/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,13 @@ def test_get_home_print_css_url(home: LibreTextsHome):
home.print_css_url
== "https://a.mtstatic.com/@cache/layout/print.css?_=99d83fb44eaebe60981933ec554d138d:site_4038"
)


def test_get_home_inline_css(home: LibreTextsHome):
"""Ensures proper print CSS url is retrieved"""
assert len(home.inline_css) >= 10 # 13 expected as of Oct. 2024
assert len("\n".join(home.inline_css)) >= 35000 # 39843 expected as of Oct. 2024


def test_get_home_url(home: LibreTextsHome, libretexts_url: str):
assert home.home_url == f"{libretexts_url}/"
16 changes: 16 additions & 0 deletions scraper/tests/test_css.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@
""",
id="ignore_data",
),
pytest.param(
"""
div {
background-image: url('https://example.com/image.jpg');
}
}/*]]>*/
""",
"https://www.acme.com/styles/main.css",
{"https://example.com/image.jpg": Path("/content/css_assets/image.jpg")},
"""
div {
background-image: url("css_assets/image.jpg");
}
""",
id="ignore_parsing_error",
),
],
)
def test_css_processor_single_doc(
Expand Down
1 change: 1 addition & 0 deletions zimui/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<link rel="icon" href="./favicon.ico" />
<link rel="stylesheet" type="text/css" media="screen" href="./content/screen.css" />
<link rel="stylesheet" type="text/css" media="print" href="./content/print.css" />
<link rel="stylesheet" type="text/css" href="./content/inline.css" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Vite App</title>
</head>
Expand Down
15 changes: 11 additions & 4 deletions zimui/src/views/HomeView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,15 @@ watch(
</script>

<template>
<v-container>
<div v-if="main.pageContent" v-html="main.pageContent.htmlBody"></div>
<div v-else>Page not found</div>
</v-container>
<!-- Reproduce DOM structure of libretexts.org for proper CSS functioning -->
<main class="elm-skin-container">
<article id="elm-main-content" class="elm-content-container">
<section
class="mt-content-container"
v-if="main.pageContent"
v-html="main.pageContent.htmlBody"
></section>
<div v-else>Page not found</div>
</article>
</main>
</template>

0 comments on commit ff4240b

Please sign in to comment.