17 参考文献
17.1 批量生成参考文献列表
- 适用于revealjs
import os
for f in os.listdir('data-doctor'):
if f.endswith('.pdf'):
print(f"- [{os.path.split(f)[1]}](data-doctor/{f})")
- 适用于books项目
#| echo: false
from IPython.display import Markdown
import os
from urllib.parse import quote
= 0
n = ''
mdStr = 'data/恒格列净复审'
file_folder = "https://www.mmphcrc.com/pdf/jupyter/HuLinhuiPy/ethics"
abs_path for f in os.listdir(file_folder):
+= 1
n if f != ".ipynb_checkpoints":
+= f"- [{os.path.split(f)[1]}]({abs_path}/{file_folder}/{quote(f)})\n"
mdStr if n==10:
+= "\n"
mdStr
Markdown(mdStr)
17.2 jupyterlab生成交互搜索的表格
通过关键词搜索定位至pdf文件名称、页码和行数
import os
import PyPDF2
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import re
def extract_text_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
= PyPDF2.PdfReader(file)
reader = len(reader.pages)
num_pages
= {
data '文件名': [],
'页码': [],
'行': [],
'文本': []
}
for page_num in range(num_pages):
= reader.pages[page_num]
page = page.extract_text()
page_text = page_text.split('\n')
lines
for line_num, line in enumerate(lines):
'文件名'].append(pdf_path)
data['页码'].append(page_num + 1)
data['行'].append(line_num + 1)
data['文本'].append(line)
data[
return pd.DataFrame(data)
def extract_text_from_pdfs(folder_path):
= []
dfs for filename in os.listdir(folder_path):
if filename.endswith('.pdf'):
= os.path.join(folder_path, filename)
pdf_path = extract_text_from_pdf(pdf_path)
df
dfs.append(df)
return pd.concat(dfs, ignore_index=True)
= 'data-doctor'
folder_path = extract_text_from_pdfs(folder_path)
df
'display.max_colwidth', None)
pd.set_option(
# 创建一个文本框用于搜索
= widgets.Text(description='搜索:')
search_box = widgets.BoundedIntText(description='页数', min=1, max=1, step=1, value=1) # 初始总页数为1
page_text = widgets.Button(description='◀')
prev_button = widgets.Button(description='▶')
next_button
= widgets.Output()
output
def filter_table(search_value, page):
= df[df['文本'].str.contains(search_value)].copy()
filtered_df
# 更新总页数
= len(filtered_df) // 5 + 1
total_pages max = total_pages
page_text.= min(page, total_pages) # 确保当前页数不超过总页数
page_text.value
# 计算当前页的起始和结束索引
= (page - 1) * 5
page_start = page_start + 5
page_end
with output:
output.clear_output()= filtered_df.文本.str.replace(search_value, f'<span style="background-color:yellow">{search_value}</span>')
filtered_df.文本 # 使用 Pandas 的 to_html 方法生成带有 HTML 标签的字符串
= filtered_df.iloc[page_start:page_end].to_html(escape=False)
html_str
display(HTML(html_str))
# 更新总页数的显示
update_pagination_label()
def prev_page(button_event):
if page_text.value > 1:
-= 1
page_text.value
def next_page(button_event):
if page_text.value < page_text.max:
+= 1
page_text.value
def update_pagination_label():
= f'/{page_text.max}' # 更新总页数的显示
pagination_label.value
# 监听文本框值和页数值的变化并更新表格
lambda event: filter_table(search_box.value, page_text.value), names='value')
search_box.observe(lambda event: filter_table(search_box.value, page_text.value), names='value')
page_text.observe(
prev_button.on_click(prev_page)
next_button.on_click(next_page)
= widgets.HBox([prev_button, widgets.Label('页码:'), page_text, widgets.Label(f'/{page_text.max}')])
pagination_box
def update_pagination_box():
= [prev_button, widgets.Label('页码:'), page_text, pagination_label, next_button]
pagination_box.children
update_pagination_box()
display(widgets.VBox([search_box, pagination_box, output]))
17.3 引文footnote位置下移
custom.css里加上以下样式内容:
aside {margin-bottom: -200px; /*--默认情况下是0。置为负数,代表 使引文位置向下移一点距离,这样可以避免与下文重合。 --*/
}
注意:如何引文条数增加至某一数量时,可会与上文内容出现重合。