-
Notifications
You must be signed in to change notification settings - Fork 253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding support for pandas dataframes, multindex formatting #1046
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from pandas import MultiIndex | ||
from fpdf import FPDF | ||
|
||
|
||
class FPDF_pandas(FPDF): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that this adapter only adds a single method, I think we should provide a mixin instead, so that class PandasMixin:
def dataframe(self, df, **kwargs):
... And that would be how end-users make use of it: from fpdf import FPDF
from fpdf.pandas import PandasMixin
class MyPDF(FPDF, PandasMixin):
pass
pdf = MyPDF()
pdf.add_page()
pdf.set_font("Times", size=10)
pdf.dataframe(df, ...) What do you think of this approach @afriedman412 🙂? |
||
def __init__(self, **kwargs): | ||
super().__init__(**kwargs) | ||
|
||
def dataframe(self, df, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some docstring would be nice before merging this PR 🙂 (as well as an addition in |
||
with self.table( | ||
num_index_columns=df.index.nlevels, | ||
num_heading_rows=df.columns.nlevels, | ||
**kwargs | ||
) as table: | ||
TABLE_DATA = format_df(df) | ||
for data_row in TABLE_DATA: | ||
row = table.row() | ||
for datum in data_row: | ||
row.cell(datum) | ||
|
||
|
||
def format_df(df, char: str = " ", convert_to_string: bool = True) -> list: | ||
data = df.map(str).values.tolist() | ||
if isinstance(df.columns, MultiIndex): | ||
heading = [list(c) for c in zip(*df.columns)] | ||
else: | ||
heading = df.columns.values.reshape(1, len(df.columns)).tolist() | ||
|
||
if isinstance(df.index, MultiIndex): | ||
index = [list(c) for c in df.index] | ||
else: | ||
index = df.index.values.reshape(len(df), 1).tolist() | ||
padding = [list(char) * df.index.nlevels] * df.columns.nlevels | ||
|
||
output = [i + j for i, j in zip(padding + index, heading + data)] | ||
if convert_to_string: | ||
output = [[str(d) for d in row] for row in output] | ||
return output |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
from .util import Padding | ||
|
||
DEFAULT_HEADINGS_STYLE = FontFace(emphasis="BOLD") | ||
DEFAULT_INDEX_STYLE = FontFace(emphasis="BOLD") | ||
|
||
|
||
class Table: | ||
|
@@ -32,6 +33,7 @@ def __init__( | |
gutter_height=0, | ||
gutter_width=0, | ||
headings_style=DEFAULT_HEADINGS_STYLE, | ||
index_style=DEFAULT_INDEX_STYLE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. New parameters should always be added at the end of the parameters list, otherwise the code of existing align = "CENTER"
v_align = "MIDDLE",
borders_layout = TableBordersLayout.ALL
cell_fill_color = None
cell_fill_mode = TableCellFillMode.NONE
col_widths = None
first_row_as_headings = True
gutter_height = 0
gutter_width = 0
headings_style = DEFAULT_HEADINGS_STYLE
line_height = None
with pdf.table(align, v_align, borders_layout, cell_fill_color, cell_fill_mode, col_widths, first_row_as_headings, gutter_height, gutter_width, headings_style, line_height) as table:
... # this code would break after merging this PR, because line_height would be passed to index_style |
||
line_height=None, | ||
markdown=False, | ||
text_align="JUSTIFY", | ||
|
@@ -40,6 +42,7 @@ def __init__( | |
padding=None, | ||
outer_border_width=None, | ||
num_heading_rows=1, | ||
num_index_columns=0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that those 2 new parameters are only required for rendering pandas dataframes, I think they should not be added there but in |
||
): | ||
""" | ||
Args: | ||
|
@@ -58,6 +61,8 @@ def __init__( | |
gutter_width (float): optional horizontal space between columns | ||
headings_style (fpdf.fonts.FontFace): optional, default to bold. | ||
Defines the visual style of the top headings row: size, color, emphasis... | ||
index_style (fpdf.fonts.FontFace): optional, default to bold. | ||
Defines the visual style of the top headings row: size, color, emphasis... | ||
line_height (number): optional. Defines how much vertical space a line of text will occupy | ||
markdown (bool): optional, default to False. Enable markdown interpretation of cells textual content | ||
text_align (str, fpdf.enums.Align, tuple): optional, default to JUSTIFY. Control text alignment inside cells. | ||
|
@@ -72,6 +77,7 @@ def __init__( | |
num_heading_rows (number): optional. Sets the number of heading rows, default value is 1. If this value is not 1, | ||
first_row_as_headings needs to be True if num_heading_rows>1 and False if num_heading_rows=0. For backwards compatibility, | ||
first_row_as_headings is used in case num_heading_rows is 1. | ||
num_index_cols (number): optional. Sets the number of index columns, default value is 0. | ||
""" | ||
self._fpdf = fpdf | ||
self._align = align | ||
|
@@ -85,12 +91,14 @@ def __init__( | |
self._gutter_height = gutter_height | ||
self._gutter_width = gutter_width | ||
self._headings_style = headings_style | ||
self._index_style = index_style | ||
self._line_height = 2 * fpdf.font_size if line_height is None else line_height | ||
self._markdown = markdown | ||
self._text_align = text_align | ||
self._width = fpdf.epw if width is None else width | ||
self._wrapmode = wrapmode | ||
self._num_heading_rows = num_heading_rows | ||
self.num_index_columns = num_index_columns | ||
self._initial_style = None | ||
self.rows = [] | ||
|
||
|
@@ -129,13 +137,16 @@ def __init__( | |
self.row(row) | ||
|
||
def row(self, cells=(), style=None): | ||
"Adds a row to the table. Yields a `Row` object." | ||
"Adds a row to the table. Yields a `Row` object. Styles first `self.num_index_columns` cells with `self.index_style`" | ||
if self._initial_style is None: | ||
self._initial_style = self._fpdf.font_face() | ||
row = Row(self, style=style) | ||
self.rows.append(row) | ||
for cell in cells: | ||
row.cell(cell) | ||
for n, cell in enumerate(cells): | ||
if n < self.num_index_columns: | ||
row.cell(cell, style=self._index_style) | ||
else: | ||
row.cell(cell) | ||
return row | ||
|
||
def render(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this only applies to the panda adapter, and whould probably be removed.
However, it would be nice to add a section about the panda adapter to this file, as I'm sure many
fpdf2
users would be happy to find out about it while reading this page 🙂