2025年12月1日 星期一

parse QR code for Taiwanese Electronic Invoices

Install the libraries:

$ pip install pyzbar pillow
or
$ uv init
$ uv add pyzbar pillow

$ sudo apt-get install libzbar0

parse code from jpg file
import os
import sys
import csv
from PIL import Image
from pyzbar.pyzbar import decode

# Output filename
CSV_FILENAME = "invoices.csv"

def parse_invoice_string(record):
    """
    Parses the raw QR string into a dictionary.
    """
    record = record.strip()
    record = record.split(":")[0]

    # Filter out short QR codes (like item details)
    if len(record) < 53:
        return None

    try:
        # --- PARSING LOGIC ---
        invoice_code = record[0:10]              # 統一發票碼
        date_minguo = record[10:17]              # 發票日期 (ROC)
        price_hex = record[29:37]                # 價錢 HEX
        seller_buy_combined = record[37:53]
        seller_id = seller_buy_combined[0:8]     # 賣方統編
        buyer_id = seller_buy_combined[8:16]     # 買方統編

        # Date: ROC -> Gregorian
        year = int(date_minguo[0:3]) + 1911
        month = date_minguo[3:5]
        day = date_minguo[5:7]
        date_str = f'{year}/{month}/{day}'

        # Price: Hex -> Int
        price = int(price_hex, 16)

        return {
            "invoice_code": invoice_code,
            "seller_id": seller_id,
            "buyer_id": buyer_id,
            "date": date_str,
            "price": price
        }
    except Exception as e:
        print(f"⚠️ Parsing Error: {e}")
        return None

def save_to_csv(data):
    """
    Appends a single invoice record to the CSV file.
    """
    file_exists = os.path.isfile(CSV_FILENAME)

    with open(CSV_FILENAME, mode='a', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        
        # Write header only if the file is new
        if not file_exists:
            writer.writerow(["統一發票碼", "賣方統編", "日期", "價錢"])
        
        # Write the data row
        writer.writerow([
            data['invoice_code'], 
            data['buyer_id'], 
            data['date'], 
            data['price']
        ])
    print(f"💾 Saved to {CSV_FILENAME}")

def scan_and_export(image_path):
    if not os.path.exists(image_path):
        print(f"❌ File not found: {image_path}")
        return

    print(f"🔍 Scanning: {image_path}...")
    
    try:
        img = Image.open(image_path)
        decoded_objects = decode(img)

        if not decoded_objects:
            print("❌ No QR codes found.")
            return

        valid_count = 0
        for obj in decoded_objects:
            raw_data = obj.data.decode('utf-8')
            result = parse_invoice_string(raw_data)

            if result:
                # Print to console
                print(f"✅ Found Invoice: {result['invoice_code']} | ${result['price']}")
                
                # Save to CSV
                save_to_csv(result)
                valid_count += 1
        
        if valid_count == 0:
            print("⚠️ QR codes found, but none matched the invoice format.")

    except Exception as e:
        print(f"❌ Error: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    if len(sys.argv) > 1:
        target_file = sys.argv[1]
    else:
        target_file = "my_qr_code.jpg" 

    scan_and_export(target_file)



=================================================================


$ tree 01/
01/
├── drive-download-20260129T092115Z-3-001.zip
├── Notes_123.txt
├── Notes_456.txt
...
...

$ cat 01/Notes_123.txt
YD999900001150109209600000064000000691231132055556666s4oOFzB5IMqpbir5l6eEIQ==:*****

parse_invoice.py

import os
import sys
import csv

def parse_invoice_folder(folder_path, output_file="invoices.csv"):
    if not os.path.isdir(folder_path):
        print(f"資料夾不存在: {folder_path}")
        return

    txt_files = [f for f in os.listdir(folder_path) if f.endswith(".txt")]
    if not txt_files:
        print(f"資料夾內沒有 txt 檔: {folder_path}")
        return

    with open(output_file, mode='w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["統一發票碼", "賣方統編", "日期", "價錢"])

        for txt_file in txt_files:
            file_path = os.path.join(folder_path, txt_file)
            with open(file_path, "r", encoding="utf-8") as f:
                for line_num, line in enumerate(f, 1):
                    # 去除 BOM + 前後空格 + 換行
                    line = line.encode('utf-8').decode('utf-8-sig').strip()
                    if not line:
                        continue

                    # 取冒號前部分
                    record = line.split(":")[0]

                    # 檢查長度
                    if len(record) $lt; 53:
                        print(f"[跳過] 檔案 {txt_file} 行 {line_num} 字串太短: {record}")
                        continue

                    # ✅ 偏移解析
                    invoice_code = record[0:10]              # 統一發票碼
                    date_minguo = record[10:17]              # 發票日期
                    #price_hex = record[29:37]                # 價錢 HEX
                    price_hex = record[29:37]                # 價錢 HEX
                    seller_buy_combined = record[37:53]      # 往後16碼
                    seller_id = seller_buy_combined[0:8]     # 賣方統編
                    buyer_id = seller_buy_combined[8:16]   # 如果需要買方統編

                    # 日期換算民國→西元
                    try:
                        year = int(date_minguo[0:3]) + 1911
                        month = date_minguo[3:5]
                        day = date_minguo[5:7]
                        date_str = f'{year}/{month}/{day}'
                    except Exception as e:
                        print(f"[錯誤] 日期解析失敗: {date_minguo} 行 {line_num}")
                        continue

                    # 價錢 HEX → 10進位
                    try:
                        price = int(price_hex, 16)
                    except Exception as e:
                        print(f"[錯誤] 價錢解析失敗: {price_hex} 行 {line_num}")
                        continue

                    writer.writerow([invoice_code, buyer_id, f'{date_str}', price])

    print(f"CSV 檔案已生成: {output_file}")

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("使用方法: python3 parse_invoice.py $lt;資料夾路徑>")
        sys.exit(1)

    folder_path = sys.argv[1]
    parse_invoice_folder(folder_path)

沒有留言:

張貼留言