17  参考文献

17.1 批量生成参考文献列表

  • 适用于revealjs
import os
for f in os.listdir('data-doctor'):
    if f.endswith('.pdf'):
        print(f"- [{os.path.split(f)[1]}](data-doctor/{f})")
  • 适用于books项目
#| echo: false
from IPython.display import Markdown
import os
from urllib.parse import quote
n = 0
mdStr = ''
file_folder = 'data/恒格列净复审'
abs_path = "https://www.mmphcrc.com/pdf/jupyter/HuLinhuiPy/ethics"
for f in os.listdir(file_folder):
    n += 1
    if f != ".ipynb_checkpoints":
        mdStr += f"- [{os.path.split(f)[1]}]({abs_path}/{file_folder}/{quote(f)})\n"
    if n==10:
        mdStr += "\n"

Markdown(mdStr)

17.2 jupyterlab生成交互搜索的表格

通过关键词搜索定位至pdf文件名称、页码和行数

import os
import PyPDF2
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import re

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        num_pages = len(reader.pages)

        data = {
            '文件名': [],
            '页码': [],
            '行': [],
            '文本': []
        }

        for page_num in range(num_pages):
            page = reader.pages[page_num]
            page_text = page.extract_text()
            lines = page_text.split('\n')
            
            for line_num, line in enumerate(lines):
                data['文件名'].append(pdf_path)
                data['页码'].append(page_num + 1)
                data['行'].append(line_num + 1)
                data['文本'].append(line)

        return pd.DataFrame(data)

def extract_text_from_pdfs(folder_path):
    dfs = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.pdf'):
            pdf_path = os.path.join(folder_path, filename)
            df = extract_text_from_pdf(pdf_path)
            dfs.append(df)

    return pd.concat(dfs, ignore_index=True)

folder_path = 'data-doctor'
df = extract_text_from_pdfs(folder_path)

pd.set_option('display.max_colwidth', None)

# 创建一个文本框用于搜索
search_box = widgets.Text(description='搜索:')
page_text = widgets.BoundedIntText(description='页数', min=1, max=1, step=1, value=1)  # 初始总页数为1
prev_button = widgets.Button(description='◀')
next_button = widgets.Button(description='▶')

output = widgets.Output()

def filter_table(search_value, page):
    filtered_df = df[df['文本'].str.contains(search_value)].copy()
    
    # 更新总页数
    total_pages = len(filtered_df) // 5 + 1
    page_text.max = total_pages
    page_text.value = min(page, total_pages)  # 确保当前页数不超过总页数
    
    # 计算当前页的起始和结束索引
    page_start = (page - 1) * 5
    page_end = page_start + 5
    
    with output:
        output.clear_output()
        filtered_df.文本 = filtered_df.文本.str.replace(search_value, f'<span style="background-color:yellow">{search_value}</span>')
        # 使用 Pandas 的 to_html 方法生成带有 HTML 标签的字符串
        html_str = filtered_df.iloc[page_start:page_end].to_html(escape=False)
        display(HTML(html_str))

        
    update_pagination_label()  # 更新总页数的显示

def prev_page(button_event):
    if page_text.value > 1:
        page_text.value -= 1

def next_page(button_event):
    if page_text.value < page_text.max:
        page_text.value += 1

def update_pagination_label():
    pagination_label.value = f'/{page_text.max}'  # 更新总页数的显示

# 监听文本框值和页数值的变化并更新表格
search_box.observe(lambda event: filter_table(search_box.value, page_text.value), names='value')
page_text.observe(lambda event: filter_table(search_box.value, page_text.value), names='value')
prev_button.on_click(prev_page)
next_button.on_click(next_page)

pagination_box = widgets.HBox([prev_button, widgets.Label('页码:'), page_text, widgets.Label(f'/{page_text.max}')])

def update_pagination_box():
    pagination_box.children = [prev_button, widgets.Label('页码:'), page_text, pagination_label, next_button]

update_pagination_box()

display(widgets.VBox([search_box, pagination_box, output]))

17.3 引文footnote位置下移

custom.css里加上以下样式内容:

aside {
    margin-bottom: -200px;  /*--默认情况下是0。置为负数,代表 使引文位置向下移一点距离,这样可以避免与下文重合。 --*/
}

注意:如何引文条数增加至某一数量时,可会与上文内容出现重合。