pax_global_header 0000666 0000000 0000000 00000000064 15007276064 0014521 g ustar 00root root 0000000 0000000 52 comment=d892d2ae49cd29b9639e73cd13017cfeef507ac7
sqlite-utils-4.0a0/ 0000775 0000000 0000000 00000000000 15007276064 0014244 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/.github/ 0000775 0000000 0000000 00000000000 15007276064 0015604 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/.github/FUNDING.yml 0000664 0000000 0000000 00000000021 15007276064 0017412 0 ustar 00root root 0000000 0000000 github: [simonw]
sqlite-utils-4.0a0/.github/workflows/ 0000775 0000000 0000000 00000000000 15007276064 0017641 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/.github/workflows/codeql-analysis.yml 0000664 0000000 0000000 00000004277 15007276064 0023466 0 ustar 00root root 0000000 0000000 name: "CodeQL"
on:
push:
branches: [main]
schedule:
- cron: '0 4 * * 5'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# Override automatic language detection by changing the below list
# Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
language: ['python']
# Learn more...
# https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
fetch-depth: 2
# If this run was triggered by a pull request event, then checkout
# the head of the pull request instead of the merge commit.
- run: git checkout HEAD^2
if: ${{ github.event_name == 'pull_request' }}
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v1
# âšī¸ Command-line programs to run using the OS shell.
# đ https://git.io/JvXDl
# âī¸ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
sqlite-utils-4.0a0/.github/workflows/documentation-links.yml 0000664 0000000 0000000 00000000451 15007276064 0024353 0 ustar 00root root 0000000 0000000 name: Read the Docs Pull Request Preview
on:
pull_request_target:
types:
- opened
permissions:
pull-requests: write
jobs:
documentation-links:
runs-on: ubuntu-latest
steps:
- uses: readthedocs/actions/preview@v1
with:
project-slug: "sqlite-utils"
sqlite-utils-4.0a0/.github/workflows/publish.yml 0000664 0000000 0000000 00000003007 15007276064 0022032 0 ustar 00root root 0000000 0000000 name: Publish Python Package
on:
release:
types: [created]
jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v4
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip install -e '.[test]'
- name: Run tests
run: |
pytest
deploy:
runs-on: ubuntu-latest
needs: [test]
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.13'
- uses: actions/cache@v4
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-publish-pip-
- name: Install dependencies
run: |
pip install setuptools wheel twine
- name: Publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*
sqlite-utils-4.0a0/.github/workflows/spellcheck.yml 0000664 0000000 0000000 00000001365 15007276064 0022506 0 ustar 00root root 0000000 0000000 name: Check spelling in documentation
on: [push, pull_request]
jobs:
spellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: actions/cache@v4
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip install -e '.[docs]'
- name: Check spelling
run: |
codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt
codespell sqlite_utils --ignore-words docs/codespell-ignore-words.txt
sqlite-utils-4.0a0/.github/workflows/test-coverage.yml 0000664 0000000 0000000 00000002147 15007276064 0023140 0 ustar 00root root 0000000 0000000 name: Calculate test coverage
on:
push:
branches:
- main
pull_request:
branches:
- main
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- uses: actions/cache@v4
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install SpatiaLite
run: sudo apt-get install libsqlite3-mod-spatialite
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .[test]
python -m pip install pytest-cov
- name: Run tests
run: |-
ls -lah
pytest --cov=sqlite_utils --cov-report xml:coverage.xml --cov-report term
ls -lah
- name: Upload coverage report
uses: codecov/codecov-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: coverage.xml
sqlite-utils-4.0a0/.github/workflows/test-sqlite-support.yml 0000664 0000000 0000000 00000002317 15007276064 0024357 0 ustar 00root root 0000000 0000000 name: Test SQLite versions
on: [push, pull_request]
permissions:
contents: read
jobs:
test:
runs-on: ${{ matrix.platform }}
continue-on-error: true
strategy:
matrix:
platform: [ubuntu-latest]
python-version: ["3.9"]
sqlite-version: [
"3.46",
"3.23.1", # 2018-04-10, before UPSERT
]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
allow-prereleases: true
cache: pip
cache-dependency-path: setup.py
- name: Set up SQLite ${{ matrix.sqlite-version }}
uses: asg017/sqlite-versions@71ea0de37ae739c33e447af91ba71dda8fcf22e6
with:
version: ${{ matrix.sqlite-version }}
cflags: "-DSQLITE_ENABLE_DESERIALIZE -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_FTS4 -DSQLITE_ENABLE_FTS3_PARENTHESIS -DSQLITE_ENABLE_RTREE -DSQLITE_ENABLE_JSON1"
- run: python3 -c "import sqlite3; print(sqlite3.sqlite_version)"
- name: Install dependencies
run: |
pip install -e '.[test]'
pip freeze
- name: Run tests
run: |
python -m pytest
sqlite-utils-4.0a0/.github/workflows/test.yml 0000664 0000000 0000000 00000003570 15007276064 0021350 0 ustar 00root root 0000000 0000000 name: Test
on: [push, pull_request]
env:
FORCE_COLOR: 1
jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
numpy: [0, 1]
os: [ubuntu-latest, macos-latest, windows-latest, macos-14]
# Skip 3.9 on macos-14 - it only has 3.10+
exclude:
- python-version: "3.9"
os: macos-14
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
allow-prereleases: true
- uses: actions/cache@v4
name: Configure pip caching
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
pip install -e '.[test,mypy,flake8]'
- name: Optionally install tui dependencies
run: pip install -e '.[tui]'
- name: Optionally install numpy
if: matrix.numpy == 1
run: pip install numpy
- name: Install SpatiaLite
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get install libsqlite3-mod-spatialite
- name: On macOS with Python 3.10 test with sqlean.py
if: matrix.os == 'macos-latest' && matrix.python-version == '3.10'
run: pip install sqlean.py sqlite-dump
- name: Build extension for --load-extension test
if: matrix.os == 'ubuntu-latest'
run: |-
(cd tests && gcc ext.c -fPIC -shared -o ext.so && ls -lah)
- name: Run tests
run: |
pytest -v
- name: run mypy
run: mypy sqlite_utils tests
- name: run flake8
run: flake8
- name: Check formatting
run: black . --check
- name: Check if cog needs to be run
run: |
cog --check README.md docs/*.rst
sqlite-utils-4.0a0/.gitignore 0000664 0000000 0000000 00000000334 15007276064 0016234 0 ustar 00root root 0000000 0000000 .venv
build
*.db
__pycache__/
*.py[cod]
*$py.class
venv
.eggs
.pytest_cache
*.egg-info
.DS_Store
.mypy_cache
.coverage
.schema
.vscode
.hypothesis
Pipfile
Pipfile.lock
pyproject.toml
tests/*.dylib
tests/*.so
tests/*.dll
sqlite-utils-4.0a0/.gitpod.yml 0000664 0000000 0000000 00000000044 15007276064 0016331 0 ustar 00root root 0000000 0000000 tasks:
- init: pip install '.[test]' sqlite-utils-4.0a0/.readthedocs.yaml 0000664 0000000 0000000 00000000323 15007276064 0017471 0 ustar 00root root 0000000 0000000 version: 2
sphinx:
configuration: docs/conf.py
build:
os: ubuntu-22.04
tools:
python: "3.11"
python:
install:
- method: pip
path: .
extra_requirements:
- docs
formats:
- pdf
- epub
sqlite-utils-4.0a0/Justfile 0000664 0000000 0000000 00000001236 15007276064 0015756 0 ustar 00root root 0000000 0000000 # Run tests and linters
@default: test lint
# Setup project
@init:
pipenv run pip install -e '.[test,docs,mypy,flake8]'
# Run pytest with supplied options
@test *options:
pipenv run pytest {{options}}
# Run linters: black, flake8, mypy, cog
@lint:
pipenv run black . --check
pipenv run flake8
pipenv run mypy sqlite_utils tests
pipenv run cog --check README.md docs/*.rst
pipenv run codespell docs/*.rst --ignore-words docs/codespell-ignore-words.txt
# Rebuild docs with cog
@cog:
pipenv run cog -r README.md docs/*.rst
# Serve live docs on localhost:8000
@docs: cog
cd docs && pipenv run make livehtml
# Apply Black
@black:
pipenv run black .
sqlite-utils-4.0a0/LICENSE 0000664 0000000 0000000 00000026135 15007276064 0015260 0 ustar 00root root 0000000 0000000 Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
sqlite-utils-4.0a0/MANIFEST.in 0000664 0000000 0000000 00000000134 15007276064 0016000 0 ustar 00root root 0000000 0000000 include LICENSE
include README.md
recursive-include docs *.rst
recursive-include tests *.py
sqlite-utils-4.0a0/README.md 0000664 0000000 0000000 00000012104 15007276064 0015521 0 ustar 00root root 0000000 0000000 # sqlite-utils
[](https://pypi.org/project/sqlite-utils/)
[](https://sqlite-utils.datasette.io/en/stable/changelog.html)
[](https://pypi.org/project/sqlite-utils/)
[](https://github.com/simonw/sqlite-utils/actions?query=workflow%3ATest)
[](http://sqlite-utils.datasette.io/en/stable/?badge=stable)
[](https://codecov.io/gh/simonw/sqlite-utils)
[](https://github.com/simonw/sqlite-utils/blob/main/LICENSE)
[](https://discord.gg/Ass7bCAMDw)
Python CLI utility and library for manipulating SQLite databases.
## Some feature highlights
- [Pipe JSON](https://sqlite-utils.datasette.io/en/stable/cli.html#inserting-json-data) (or [CSV or TSV](https://sqlite-utils.datasette.io/en/stable/cli.html#inserting-csv-or-tsv-data)) directly into a new SQLite database file, automatically creating a table with the appropriate schema
- [Run in-memory SQL queries](https://sqlite-utils.datasette.io/en/stable/cli.html#querying-data-directly-using-an-in-memory-database), including joins, directly against data in CSV, TSV or JSON files and view the results
- [Configure SQLite full-text search](https://sqlite-utils.datasette.io/en/stable/cli.html#configuring-full-text-search) against your database tables and run search queries against them, ordered by relevance
- Run [transformations against your tables](https://sqlite-utils.datasette.io/en/stable/cli.html#transforming-tables) to make schema changes that SQLite `ALTER TABLE` does not directly support, such as changing the type of a column
- [Extract columns](https://sqlite-utils.datasette.io/en/stable/cli.html#extracting-columns-into-a-separate-table) into separate tables to better normalize your existing data
- [Install plugins](https://sqlite-utils.datasette.io/en/stable/plugins.html) to add custom SQL functions and additional features
Read more on my blog, in this series of posts on [New features in sqlite-utils](https://simonwillison.net/series/sqlite-utils-features/) and other [entries tagged sqliteutils](https://simonwillison.net/tags/sqliteutils/).
## Installation
pip install sqlite-utils
Or if you use [Homebrew](https://brew.sh/) for macOS:
brew install sqlite-utils
## Using as a CLI tool
Now you can do things with the CLI utility like this:
$ sqlite-utils memory dogs.csv "select * from t"
[{"id": 1, "age": 4, "name": "Cleo"},
{"id": 2, "age": 2, "name": "Pancakes"}]
$ sqlite-utils insert dogs.db dogs dogs.csv --csv
[####################################] 100%
$ sqlite-utils tables dogs.db --counts
[{"table": "dogs", "count": 2}]
$ sqlite-utils dogs.db "select id, name from dogs"
[{"id": 1, "name": "Cleo"},
{"id": 2, "name": "Pancakes"}]
$ sqlite-utils dogs.db "select * from dogs" --csv
id,age,name
1,4,Cleo
2,2,Pancakes
$ sqlite-utils dogs.db "select * from dogs" --table
id age name
---- ----- --------
1 4 Cleo
2 2 Pancakes
You can import JSON data into a new database table like this:
$ curl https://api.github.com/repos/simonw/sqlite-utils/releases \
| sqlite-utils insert releases.db releases - --pk id
Or for data in a CSV file:
$ sqlite-utils insert dogs.db dogs dogs.csv --csv
`sqlite-utils memory` lets you import CSV or JSON data into an in-memory database and run SQL queries against it in a single command:
$ cat dogs.csv | sqlite-utils memory - "select name, age from stdin"
See the [full CLI documentation](https://sqlite-utils.datasette.io/en/stable/cli.html) for comprehensive coverage of many more commands.
## Using as a library
You can also `import sqlite_utils` and use it as a Python library like this:
```python
import sqlite_utils
db = sqlite_utils.Database("demo_database.db")
# This line creates a "dogs" table if one does not already exist:
db["dogs"].insert_all([
{"id": 1, "age": 4, "name": "Cleo"},
{"id": 2, "age": 2, "name": "Pancakes"}
], pk="id")
```
Check out the [full library documentation](https://sqlite-utils.datasette.io/en/stable/python-api.html) for everything else you can do with the Python library.
## Related projects
* [Datasette](https://datasette.io/): A tool for exploring and publishing data
* [csvs-to-sqlite](https://github.com/simonw/csvs-to-sqlite): Convert CSV files into a SQLite database
* [db-to-sqlite](https://github.com/simonw/db-to-sqlite): CLI tool for exporting a MySQL or PostgreSQL database as a SQLite file
* [dogsheep](https://dogsheep.github.io/): A family of tools for personal analytics, built on top of `sqlite-utils`
sqlite-utils-4.0a0/codecov.yml 0000664 0000000 0000000 00000000202 15007276064 0016403 0 ustar 00root root 0000000 0000000 coverage:
status:
project:
default:
informational: true
patch:
default:
informational: true
sqlite-utils-4.0a0/docs/ 0000775 0000000 0000000 00000000000 15007276064 0015174 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/docs/.gitignore 0000664 0000000 0000000 00000000007 15007276064 0017161 0 ustar 00root root 0000000 0000000 _build
sqlite-utils-4.0a0/docs/Makefile 0000664 0000000 0000000 00000001322 15007276064 0016632 0 ustar 00root root 0000000 0000000 # Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = sqlite-utils
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
livehtml:
sphinx-autobuild -a -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(0) --watch ../sqlite_utils
sqlite-utils-4.0a0/docs/_static/ 0000775 0000000 0000000 00000000000 15007276064 0016622 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/docs/_static/js/ 0000775 0000000 0000000 00000000000 15007276064 0017236 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/docs/_static/js/custom.js 0000664 0000000 0000000 00000001512 15007276064 0021105 0 ustar 00root root 0000000 0000000 jQuery(function ($) {
// Show banner linking to /stable/ if this is a /latest/ page
if (!/\/latest\//.test(location.pathname)) {
return;
}
var stableUrl = location.pathname.replace("/latest/", "/stable/");
// Check it's not a 404
fetch(stableUrl, { method: "HEAD" }).then((response) => {
if (response.status == 200) {
var warning = $(
`
Note
This documentation covers the development version of sqlite-utils.
See this page for the current stable release.
`
);
warning.find("a").attr("href", stableUrl);
$("article[role=main]").prepend(warning);
}
});
});
sqlite-utils-4.0a0/docs/_templates/ 0000775 0000000 0000000 00000000000 15007276064 0017331 5 ustar 00root root 0000000 0000000 sqlite-utils-4.0a0/docs/_templates/base.html 0000664 0000000 0000000 00000002433 15007276064 0021133 0 ustar 00root root 0000000 0000000 {%- extends "!base.html" %}
{% block site_meta %}
{{ super() }}
{% endblock %}
{% block scripts %}
{{ super() }}
{% endblock %}
sqlite-utils-4.0a0/docs/changelog.rst 0000664 0000000 0000000 00000222451 15007276064 0017663 0 ustar 00root root 0000000 0000000 .. _changelog:
===========
Changelog
===========
.. _v4_0a0:
4.0a0 (2025-05-08)
------------------
- Upsert operations now use SQLite's ``INSERT ... ON CONFLICT SET`` syntax on all SQLite versions later than 3.23.1. This is a very slight breaking change for apps that depend on the previous ``INSERT OR IGNORE`` followed by ``UPDATE`` behavior. (:issue:`652`)
- Python library users can opt-in to the previous implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor, see :ref:`python_api_old_upsert`.
- Dropped support for Python 3.8, added support for Python 3.13. (:issue:`646`)
- ``sqlite-utils tui`` is now provided by the `sqlite-utils-tui `__ plugin. (:issue:`648`)
- Test suite now also runs against SQLite 3.23.1, the last version (from 2018-04-10) before the new ``INSERT ... ON CONFLICT SET`` syntax was added. (:issue:`654`)
.. _v3_38:
3.38 (2024-11-23)
-----------------
- Plugins can now reuse the implementation of the ``sqlite-utils memory`` CLI command with the new ``return_db=True`` parameter. (:issue:`643`)
- ``table.transform()`` now recreates indexes after transforming a table. A new ``sqlite_utils.db.TransformError`` exception is raised if these indexes cannot be recreated due to conflicting changes to the table such as a column rename. Thanks, `Mat Miller `__. (:issue:`633`)
- ``table.search()`` now accepts a ``include_rank=True`` parameter, causing the resulting rows to have a ``rank`` column showing the calculated relevance score. Thanks, `liunux4odoo `__. (`#628 `__)
- Fixed an error that occurred when creating a strict table with at least one floating point column. These ``FLOAT`` columns are now correctly created as ``REAL`` as well, but only for strict tables. (:issue:`644`)
.. _v3_37:
3.37 (2024-07-18)
-----------------
- The ``create-table`` and ``insert-files`` commands all now accept multiple ``--pk`` options for compound primary keys. (:issue:`620`)
- Now tested against Python 3.13 pre-release. (`#619 `__)
- Fixed a crash that can occur in environments with a broken ``numpy`` installation, producing a ``module 'numpy' has no attribute 'int8'``. (:issue:`632`)
.. _v3_36:
3.36 (2023-12-07)
-----------------
- Support for creating tables in `SQLite STRICT mode `__. Thanks, `Taj Khattra `__. (:issue:`344`)
- CLI commands ``create-table``, ``insert`` and ``upsert`` all now accept a ``--strict`` option.
- Python methods that can create a table - ``table.create()`` and ``insert/upsert/insert_all/upsert_all`` all now accept an optional ``strict=True`` parameter.
- The ``transform`` command and ``table.transform()`` method preserve strict mode when transforming a table.
- The ``sqlite-utils create-table`` command now accepts ``str``, ``int`` and ``bytes`` as aliases for ``text``, ``integer`` and ``blob`` respectively. (:issue:`606`)
.. _v3_35_2:
3.35.2 (2023-11-03)
-------------------
- The ``--load-extension=spatialite`` option and :ref:`find_spatialite() ` utility function now both work correctly on ``arm64`` Linux. Thanks, `Mike Coats `__. (:issue:`599`)
- Fix for bug where ``sqlite-utils insert`` could cause your terminal cursor to disappear. Thanks, `Luke Plant `__. (:issue:`433`)
- ``datetime.timedelta`` values are now stored as ``TEXT`` columns. Thanks, `Harald Nezbeda `__. (:issue:`522`)
- Test suite is now also run against Python 3.12.
.. _v3_35_1:
3.35.1 (2023-09-08)
-------------------
- Fixed a bug where :ref:`table.transform() ` would sometimes re-assign the ``rowid`` values for a table rather than keeping them consistent across the operation. (:issue:`592`)
.. _v3_35:
3.35 (2023-08-17)
-----------------
Adding foreign keys to a table no longer uses ``PRAGMA writable_schema = 1`` to directly manipulate the ``sqlite_master`` table. This was resulting in errors in some Python installations where the SQLite library was compiled in a way that prevented this from working, in particular on macOS. Foreign keys are now added using the :ref:`table transformation ` mechanism instead. (:issue:`577`)
This new mechanism creates a full copy of the table, so it is likely to be significantly slower for large tables, but will no longer trigger ``table sqlite_master may not be modified`` errors on platforms that do not support ``PRAGMA writable_schema = 1``.
A new plugin, `sqlite-utils-fast-fks `__, is now available for developers who still want to use that faster but riskier implementation.
Other changes:
- The :ref:`table.transform() method ` has two new parameters: ``foreign_keys=`` allows you to replace the foreign key constraints defined on a table, and ``add_foreign_keys=`` lets you specify new foreign keys to add. These complement the existing ``drop_foreign_keys=`` parameter. (:issue:`577`)
- The :ref:`sqlite-utils transform ` command has a new ``--add-foreign-key`` option which can be called multiple times to add foreign keys to a table that is being transformed. (:issue:`585`)
- :ref:`sqlite-utils convert ` now has a ``--pdb`` option for opening a debugger on the first encountered error in your conversion script. (:issue:`581`)
- Fixed a bug where ``sqlite-utils install -e '.[test]'`` option did not work correctly.
.. _v3_34:
3.34 (2023-07-22)
-----------------
This release introduces a new :ref:`plugin system `. Read more about this in `sqlite-utils now supports plugins `__. (:issue:`567`)
- Documentation describing :ref:`how to build a plugin `.
- Plugin hook: :ref:`plugins_hooks_register_commands`, for plugins to add extra commands to ``sqlite-utils``. (:issue:`569`)
- Plugin hook: :ref:`plugins_hooks_prepare_connection`. Plugins can use this to help prepare the SQLite connection to do things like registering custom SQL functions. Thanks, `Alex Garcia `__. (:issue:`574`)
- ``sqlite_utils.Database(..., execute_plugins=False)`` option for disabling plugin execution. (:issue:`575`)
- ``sqlite-utils install -e path-to-directory`` option for installing editable code. This option is useful during the development of a plugin. (:issue:`570`)
- ``table.create(...)`` method now accepts ``replace=True`` to drop and replace an existing table with the same name, or ``ignore=True`` to silently do nothing if a table already exists with the same name. (:issue:`568`)
- ``sqlite-utils insert ... --stop-after 10`` option for stopping the insert after a specified number of records. Works for the ``upsert`` command as well. (:issue:`561`)
- The ``--csv`` and ``--tsv`` modes for ``insert`` now accept a ``--empty-null`` option, which causes empty strings in the CSV file to be stored as ``null`` in the database. (:issue:`563`)
- New ``db.rename_table(table_name, new_name)`` method for renaming tables. (:issue:`565`)
- ``sqlite-utils rename-table my.db table_name new_name`` command for renaming tables. (:issue:`565`)
- The ``table.transform(...)`` method now takes an optional ``keep_table=new_table_name`` parameter, which will cause the original table to be renamed to ``new_table_name`` rather than being dropped at the end of the transformation. (:issue:`571`)
- Documentation now notes that calling ``table.transform()`` without any arguments will reformat the SQL schema stored by SQLite to be more aesthetically pleasing. (:issue:`564`)
.. _v3_33:
3.33 (2023-06-25)
-----------------
- ``sqlite-utils`` will now use `sqlean.py `__ in place of ``sqlite3`` if it is installed in the same virtual environment. This is useful for Python environments with either an outdated version of SQLite or with restrictions on SQLite such as disabled extension loading or restrictions resulting in the ``sqlite3.OperationalError: table sqlite_master may not be modified`` error. (:issue:`559`)
- New ``with db.ensure_autocommit_off()`` context manager, which ensures that the database is in autocommit mode for the duration of a block of code. This is used by ``db.enable_wal()`` and ``db.disable_wal()`` to ensure they work correctly with ``pysqlite3`` and ``sqlean.py``.
- New ``db.iterdump()`` method, providing an iterator over SQL strings representing a dump of the database. This uses ``sqlite-dump`` if it is available, otherwise falling back on the ``conn.iterdump()`` method from ``sqlite3``. Both ``pysqlite3`` and ``sqlean.py`` omit support for ``iterdump()`` - this method helps paper over that difference.
.. _v3_32_1:
3.32.1 (2023-05-21)
-------------------
- Examples in the :ref:`CLI documentation ` can now all be copied and pasted without needing to remove a leading ``$``. (:issue:`551`)
- Documentation now covers :ref:`installation_completion` for ``bash`` and ``zsh``. (:issue:`552`)
.. _v3_32:
3.32 (2023-05-21)
-----------------
- New experimental ``sqlite-utils tui`` interface for interactively building command-line invocations, powered by `Trogon `__. This requires an optional dependency, installed using ``sqlite-utils install trogon``. (:issue:`545`)
- ``sqlite-utils analyze-tables`` command (:ref:`documentation `) now has a ``--common-limit 20`` option for changing the number of common/least-common values shown for each column. (:issue:`544`)
- ``sqlite-utils analyze-tables --no-most`` and ``--no-least`` options for disabling calculation of most-common and least-common values.
- If a column contains only ``null`` values, ``analyze-tables`` will no longer attempt to calculate the most common and least common values for that column. (:issue:`547`)
- Calling ``sqlite-utils analyze-tables`` with non-existent columns in the ``-c/--column`` option now results in an error message. (:issue:`548`)
- The ``table.analyze_column()`` method (:ref:`documented here `) now accepts ``most_common=False`` and ``least_common=False`` options for disabling calculation of those values.
.. _v3_31:
3.31 (2023-05-08)
-----------------
- Dropped support for Python 3.6. Tests now ensure compatibility with Python 3.11. (:issue:`517`)
- Automatically locates the SpatiaLite extension on Apple Silicon. Thanks, Chris Amico. (`#536 `__)
- New ``--raw-lines`` option for the ``sqlite-utils query`` and ``sqlite-utils memory`` commands, which outputs just the raw value of the first column of every row. (:issue:`539`)
- Fixed a bug where ``table.upsert_all()`` failed if the ``not_null=`` option was passed. (:issue:`538`)
- Fixed a ``ResourceWarning`` when using ``sqlite-utils insert``. (:issue:`534`)
- Now shows a more detailed error message when ``sqlite-utils insert`` is called with invalid JSON. (:issue:`532`)
- ``table.convert(..., skip_false=False)`` and ``sqlite-utils convert --no-skip-false`` options, for avoiding a misfeature where the :ref:`convert() ` mechanism skips rows in the database with a falsey value for the specified column. Fixing this by default would be a backwards-incompatible change and is under consideration for a 4.0 release in the future. (:issue:`527`)
- Tables can now be created with self-referential foreign keys. Thanks, Scott Perry. (`#537 `__)
- ``sqlite-utils transform`` no longer breaks if a table defines default values for columns. Thanks, Kenny Song. (:issue:`509`)
- Fixed a bug where repeated calls to ``table.transform()`` did not work correctly. Thanks, Martin Carpenter. (:issue:`525`)
- Improved error message if ``rows_from_file()`` is passed a non-binary-mode file-like object. (:issue:`520`)
.. _v3_30:
3.30 (2022-10-25)
-----------------
- Now tested against Python 3.11. (:issue:`502`)
- New ``table.search_sql(include_rank=True)`` option, which adds a ``rank`` column to the generated SQL. Thanks, Jacob Chapman. (`#480 `__)
- Progress bars now display for newline-delimited JSON files using the ``--nl`` option. Thanks, Mischa Untaga. (:issue:`485`)
- New ``db.close()`` method. (:issue:`504`)
- Conversion functions passed to :ref:`table.convert(...) ` can now return lists or dictionaries, which will be inserted into the database as JSON strings. (:issue:`495`)
- ``sqlite-utils install`` and ``sqlite-utils uninstall`` commands for installing packages into the same virtual environment as ``sqlite-utils``, :ref:`described here `. (:issue:`483`)
- New :ref:`sqlite_utils.utils.flatten() ` utility function. (:issue:`500`)
- Documentation on :ref:`using Just ` to run tests, linters and build documentation.
- Documentation now covers the :ref:`release_process` for this package.
.. _v3_29:
3.29 (2022-08-27)
-----------------
- The ``sqlite-utils query``, ``memory`` and ``bulk`` commands now all accept a new ``--functions`` option. This can be passed a string of Python code, and any callable objects defined in that code will be made available to SQL queries as custom SQL functions. See :ref:`cli_query_functions` for details. (:issue:`471`)
- ``db[table].create(...)`` method now accepts a new ``transform=True`` parameter. If the table already exists it will be :ref:`transformed ` to match the schema configuration options passed to the function. This may result in columns being added or dropped, column types being changed, column order being updated or not null and default values for columns being set. (:issue:`467`)
- Related to the above, the ``sqlite-utils create-table`` command now accepts a ``--transform`` option.
- New introspection property: ``table.default_values`` returns a dictionary mapping each column name with a default value to the configured default value. (:issue:`475`)
- The ``--load-extension`` option can now be provided a path to a compiled SQLite extension module accompanied by the name of an entrypoint, separated by a colon - for example ``--load-extension ./lines0:sqlite3_lines0_noread_init``. This feature is modelled on code first `contributed to Datasette `__ by Alex Garcia. (:issue:`470`)
- Functions registered using the :ref:`db.register_function() ` method can now have a custom name specified using the new ``db.register_function(fn, name=...)`` parameter. (:issue:`458`)
- :ref:`sqlite-utils rows ` has a new ``--order`` option for specifying the sort order for the returned rows. (:issue:`469`)
- All of the CLI options that accept Python code blocks can now all be used to define functions that can access modules imported in that same block of code without needing to use the ``global`` keyword. (:issue:`472`)
- Fixed bug where ``table.extract()`` would not behave correctly for columns containing null values. Thanks, Forest Gregg. (:issue:`423`)
- New tutorial: `Cleaning data with sqlite-utils and Datasette `__ shows how to use ``sqlite-utils`` to import and clean an example CSV file.
- Datasette and ``sqlite-utils`` now have a Discord community. `Join the Discord here `__.
.. _v3_28:
3.28 (2022-07-15)
-----------------
- New :ref:`table.duplicate(new_name) ` method for creating a copy of a table with a matching schema and row contents. Thanks, `David `__. (:issue:`449`)
- New ``sqlite-utils duplicate data.db table_name new_name`` CLI command for :ref:`cli_duplicate_table`. (:issue:`454`)
- ``sqlite_utils.utils.rows_from_file()`` is now a :ref:`documented API `. It can be used to read a sequence of dictionaries from a file-like object containing CSV, TSV, JSON or newline-delimited JSON. It can be passed an explicit format or can attempt to detect the format automatically. (:issue:`443`)
- ``sqlite_utils.utils.TypeTracker`` is now a documented API for detecting the likely column types for a sequence of string rows, see :ref:`python_api_typetracker`. (:issue:`445`)
- ``sqlite_utils.utils.chunks()`` is now a documented API for :ref:`splitting an iterator into chunks `. (:issue:`451`)
- ``sqlite-utils enable-fts`` now has a ``--replace`` option for replacing the existing FTS configuration for a table. (:issue:`450`)
- The ``create-index``, ``add-column`` and ``duplicate`` commands all now take a ``--ignore`` option for ignoring errors should the database not be in the right state for them to operate. (:issue:`450`)
.. _v3_27:
3.27 (2022-06-14)
-----------------
See also `the annotated release notes `__ for this release.
- Documentation now uses the `Furo `__ Sphinx theme. (:issue:`435`)
- Code examples in documentation now have a "copy to clipboard" button. (:issue:`436`)
- ``sqlite_utils.utils.utils.rows_from_file()`` is now a documented API, see :ref:`python_api_rows_from_file`. (:issue:`443`)
- ``rows_from_file()`` has two new parameters to help handle CSV files with rows that contain more values than are listed in that CSV file's headings: ``ignore_extras=True`` and ``extras_key="name-of-key"``. (:issue:`440`)
- ``sqlite_utils.utils.maximize_csv_field_size_limit()`` helper function for increasing the field size limit for reading CSV files to its maximum, see :ref:`python_api_maximize_csv_field_size_limit`. (:issue:`442`)
- ``table.search(where=, where_args=)`` parameters for adding additional ``WHERE`` clauses to a search query. The ``where=`` parameter is available on ``table.search_sql(...)`` as well. See :ref:`python_api_fts_search`. (:issue:`441`)
- Fixed bug where ``table.detect_fts()`` and other search-related functions could fail if two FTS-enabled tables had names that were prefixes of each other. (:issue:`434`)
.. _v3_26_1:
3.26.1 (2022-05-02)
-------------------
- Now depends on `click-default-group-wheel `__, a pure Python wheel package. This means you can install and use this package with `Pyodide `__, which can run Python entirely in your browser using WebAssembly. (`#429 `__)
Try that out using the `Pyodide REPL `__:
.. code-block:: python
>>> import micropip
>>> await micropip.install("sqlite-utils")
>>> import sqlite_utils
>>> db = sqlite_utils.Database(memory=True)
>>> list(db.query("select 3 * 5"))
[{'3 * 5': 15}]
.. _v3_26:
3.26 (2022-04-13)
-----------------
- New ``errors=r.IGNORE/r.SET_NULL`` parameter for the ``r.parsedatetime()`` and ``r.parsedate()`` :ref:`convert recipes `. (:issue:`416`)
- Fixed a bug where ``--multi`` could not be used in combination with ``--dry-run`` for the :ref:`convert ` command. (:issue:`415`)
- New documentation: :ref:`cli_convert_complex`. (:issue:`420`)
- More robust detection for whether or not ``deterministic=True`` is supported. (:issue:`425`)
.. _v3_25_1:
3.25.1 (2022-03-11)
-------------------
- Improved display of type information and parameters in the :ref:`API reference documentation `. (:issue:`413`)
.. _v3_25:
3.25 (2022-03-01)
-----------------
- New ``hash_id_columns=`` parameter for creating a primary key that's a hash of the content of specific columns - see :ref:`python_api_hash` for details. (:issue:`343`)
- New :ref:`db.sqlite_version ` property, returning a tuple of integers representing the version of SQLite, for example ``(3, 38, 0)``.
- Fixed a bug where :ref:`register_function(deterministic=True) ` caused errors on versions of SQLite prior to 3.8.3. (:issue:`408`)
- New documented :ref:`hash_record(record, keys=...) ` function.
.. _v3_24:
3.24 (2022-02-15)
-----------------
- SpatiaLite helpers for the ``sqlite-utils`` command-line tool - thanks, Chris Amico. (:issue:`398`)
- :ref:`sqlite-utils create-database ` ``--init-spatialite`` option for initializing SpatiaLite on a newly created database.
- :ref:`sqlite-utils add-geometry-column ` command for adding geometry columns.
- :ref:`sqlite-utils create-spatial-index ` command for adding spatial indexes.
- ``db[table].create(..., if_not_exists=True)`` option for :ref:`creating a table ` only if it does not already exist. (:issue:`397`)
- ``Database(memory_name="my_shared_database")`` parameter for creating a :ref:`named in-memory database ` that can be shared between multiple connections. (:issue:`405`)
- Documentation now describes :ref:`how to add a primary key to a rowid table ` using ``sqlite-utils transform``. (:issue:`403`)
.. _v3_23:
3.23 (2022-02-03)
-----------------
This release introduces four new utility methods for working with `SpatiaLite `__. Thanks, Chris Amico. (`#385 `__)
- ``sqlite_utils.utils.find_spatialite()`` :ref:`finds the location of the SpatiaLite module ` on disk.
- ``db.init_spatialite()`` :ref:`initializes SpatiaLite ` for the given database.
- ``table.add_geometry_column(...)`` :ref:`adds a geometry column ` to an existing table.
- ``table.create_spatial_index(...)`` :ref:`creates a spatial index ` for a column.
- ``sqlite-utils batch`` now accepts a ``--batch-size`` option. (:issue:`392`)
.. _v3_22_1:
3.22.1 (2022-01-25)
-------------------
- All commands now include example usage in their ``--help`` - see :ref:`cli_reference`. (:issue:`384`)
- Python library documentation has a new :ref:`python_api_getting_started` section. (:issue:`387`)
- Documentation now uses `Plausible analytics `__. (:issue:`389`)
.. _v3_22:
3.22 (2022-01-11)
-----------------
- New :ref:`cli_reference` documentation page, listing the output of ``--help`` for every one of the CLI commands. (:issue:`383`)
- ``sqlite-utils rows`` now has ``--limit`` and ``--offset`` options for paginating through data. (:issue:`381`)
- ``sqlite-utils rows`` now has ``--where`` and ``-p`` options for filtering the table using a ``WHERE`` query, see :ref:`cli_rows`. (:issue:`382`)
.. _v3_21:
3.21 (2022-01-10)
-----------------
CLI and Python library improvements to help run `ANALYZE `__ after creating indexes or inserting rows, to gain better performance from the SQLite query planner when it runs against indexes.
Three new CLI commands: ``create-database``, ``analyze`` and ``bulk``.
More details and examples can be found in `the annotated release notes `__.
- New ``sqlite-utils create-database`` command for creating new empty database files. (:issue:`348`)
- New Python methods for running ``ANALYZE`` against a database, table or index: ``db.analyze()`` and ``table.analyze()``, see :ref:`python_api_analyze`. (:issue:`366`)
- New :ref:`sqlite-utils analyze command ` for running ``ANALYZE`` using the CLI. (:issue:`379`)
- The ``create-index``, ``insert`` and ``upsert`` commands now have a new ``--analyze`` option for running ``ANALYZE`` after the command has completed. (:issue:`379`)
- New :ref:`sqlite-utils bulk command ` which can import records in the same way as ``sqlite-utils insert`` (from JSON, CSV or TSV) and use them to bulk execute a parametrized SQL query. (:issue:`375`)
- The CLI tool can now also be run using ``python -m sqlite_utils``. (:issue:`368`)
- Using ``--fmt`` now implies ``--table``, so you don't need to pass both options. (:issue:`374`)
- The ``--convert`` function applied to rows can now modify the row in place. (:issue:`371`)
- The :ref:`insert-files command ` supports two new columns: ``stem`` and ``suffix``. (:issue:`372`)
- The ``--nl`` import option now ignores blank lines in the input. (:issue:`376`)
- Fixed bug where streaming input to the ``insert`` command with ``--batch-size 1`` would appear to only commit after several rows had been ingested, due to unnecessary input buffering. (:issue:`364`)
.. _v3_20:
3.20 (2022-01-05)
-----------------
- ``sqlite-utils insert ... --lines`` to insert the lines from a file into a table with a single ``line`` column, see :ref:`cli_insert_unstructured`.
- ``sqlite-utils insert ... --text`` to insert the contents of the file into a table with a single ``text`` column and a single row.
- ``sqlite-utils insert ... --convert`` allows a Python function to be provided that will be used to convert each row that is being inserted into the database. See :ref:`cli_insert_convert`, including details on special behavior when combined with ``--lines`` and ``--text``. (:issue:`356`)
- ``sqlite-utils convert`` now accepts a code value of ``-`` to read code from standard input. (:issue:`353`)
- ``sqlite-utils convert`` also now accepts code that defines a named ``convert(value)`` function, see :ref:`cli_convert`.
- ``db.supports_strict`` property showing if the database connection supports `SQLite strict tables `__.
- ``table.strict`` property (see :ref:`python_api_introspection_strict`) indicating if the table uses strict mode. (:issue:`344`)
- Fixed bug where ``sqlite-utils upsert ... --detect-types`` ignored the ``--detect-types`` option. (:issue:`362`)
.. _v3_19:
3.19 (2021-11-20)
-----------------
- The :ref:`table.lookup() method ` now accepts keyword arguments that match those on the underlying ``table.insert()`` method: ``foreign_keys=``, ``column_order=``, ``not_null=``, ``defaults=``, ``extracts=``, ``conversions=`` and ``columns=``. You can also now pass ``pk=`` to specify a different column name to use for the primary key. (:issue:`342`)
.. _v3_18:
3.18 (2021-11-14)
-----------------
- The ``table.lookup()`` method now has an optional second argument which can be used to populate columns only the first time the record is created, see :ref:`python_api_lookup_tables`. (:issue:`339`)
- ``sqlite-utils memory`` now has a ``--flatten`` option for :ref:`flattening nested JSON objects ` into separate columns, consistent with ``sqlite-utils insert``. (:issue:`332`)
- ``table.create_index(..., find_unique_name=True)`` parameter, which finds an available name for the created index even if the default name has already been taken. This means that ``index-foreign-keys`` will work even if one of the indexes it tries to create clashes with an existing index name. (:issue:`335`)
- Added ``py.typed`` to the module, so `mypy `__ should now correctly pick up the type annotations. Thanks, Andreas Longo. (:issue:`331`)
- Now depends on ``python-dateutil`` instead of depending on ``dateutils``. Thanks, Denys Pavlov. (:issue:`324`)
- ``table.create()`` (see :ref:`python_api_explicit_create`) now handles ``dict``, ``list`` and ``tuple`` types, mapping them to ``TEXT`` columns in SQLite so that they can be stored encoded as JSON. (:issue:`338`)
- Inserted data with square braces in the column names (for example a CSV file containing a ``item[price]``) column now have the braces converted to underscores: ``item_price_``. Previously such columns would be rejected with an error. (:issue:`329`)
- Now also tested against Python 3.10. (`#330 `__)
.. _v3_17.1:
3.17.1 (2021-09-22)
-------------------
- :ref:`sqlite-utils memory ` now works if files passed to it share the same file name. (:issue:`325`)
- :ref:`sqlite-utils query ` now returns ``[]`` in JSON mode if no rows are returned. (:issue:`328`)
.. _v3_17:
3.17 (2021-08-24)
-----------------
- The :ref:`sqlite-utils memory ` command has a new ``--analyze`` option, which runs the equivalent of the :ref:`analyze-tables ` command directly against the in-memory database created from the incoming CSV or JSON data. (:issue:`320`)
- :ref:`sqlite-utils insert-files ` now has the ability to insert file contents in to ``TEXT`` columns in addition to the default ``BLOB``. Pass the ``--text`` option or use ``content_text`` as a column specifier. (:issue:`319`)
.. _v3_16:
3.16 (2021-08-18)
-----------------
- Type signatures added to more methods, including ``table.resolve_foreign_keys()``, ``db.create_table_sql()``, ``db.create_table()`` and ``table.create()``. (:issue:`314`)
- New ``db.quote_fts(value)`` method, see :ref:`python_api_quote_fts` - thanks, Mark Neumann. (:issue:`246`)
- ``table.search()`` now accepts an optional ``quote=True`` parameter. (:issue:`296`)
- CLI command ``sqlite-utils search`` now accepts a ``--quote`` option. (:issue:`296`)
- Fixed bug where ``--no-headers`` and ``--tsv`` options to :ref:`sqlite-utils insert ` could not be used together. (:issue:`295`)
- Various small improvements to :ref:`reference` documentation.
.. _v3_15.1:
3.15.1 (2021-08-10)
-------------------
- Python library now includes type annotations on almost all of the methods, plus detailed docstrings describing each one. (:issue:`311`)
- New :ref:`reference` documentation page, powered by those docstrings.
- Fixed bug where ``.add_foreign_keys()`` failed to raise an error if called against a ``View``. (:issue:`313`)
- Fixed bug where ``.delete_where()`` returned a ``[]`` instead of returning ``self`` if called against a non-existent table. (:issue:`315`)
.. _v3_15:
3.15 (2021-08-09)
-----------------
- ``sqlite-utils insert --flatten`` option for :ref:`flattening nested JSON objects ` to create tables with column names like ``topkey_nestedkey``. (:issue:`310`)
- Fixed several spelling mistakes in the documentation, spotted `using codespell `__.
- Errors that occur while using the ``sqlite-utils`` CLI tool now show the responsible SQL and query parameters, if possible. (:issue:`309`)
.. _v3_14:
3.14 (2021-08-02)
-----------------
This release introduces the new :ref:`sqlite-utils convert command ` (:issue:`251`) and corresponding :ref:`table.convert(...) ` Python method (:issue:`302`). These tools can be used to apply a Python conversion function to one or more columns of a table, either updating the column in place or using transformed data from that column to populate one or more other columns.
This command-line example uses the Python standard library `textwrap module `__ to wrap the content of the ``content`` column in the ``articles`` table to 100 characters::
$ sqlite-utils convert content.db articles content \
'"\n".join(textwrap.wrap(value, 100))' \
--import=textwrap
The same operation in Python code looks like this:
.. code-block:: python
import sqlite_utils, textwrap
db = sqlite_utils.Database("content.db")
db["articles"].convert("content", lambda v: "\n".join(textwrap.wrap(v, 100)))
See the full documentation for the :ref:`sqlite-utils convert command ` and the :ref:`table.convert(...) ` Python method for more details.
Also in this release:
- The new ``table.count_where(...)`` method, for counting rows in a table that match a specific SQL ``WHERE`` clause. (:issue:`305`)
- New ``--silent`` option for the :ref:`sqlite-utils insert-files command ` to hide the terminal progress bar, consistent with the ``--silent`` option for ``sqlite-utils convert``. (:issue:`301`)
.. _v3_13:
3.13 (2021-07-24)
-----------------
- ``sqlite-utils schema my.db table1 table2`` command now accepts optional table names. (:issue:`299`)
- ``sqlite-utils memory --help`` now describes the ``--schema`` option.
.. _v3_12:
3.12 (2021-06-25)
-----------------
- New :ref:`db.query(sql, params) ` method, which executes a SQL query and returns the results as an iterator over Python dictionaries. (:issue:`290`)
- This project now uses ``flake8`` and has started to use ``mypy``. (:issue:`291`)
- New documentation on :ref:`contributing ` to this project. (:issue:`292`)
.. _v3_11:
3.11 (2021-06-20)
-----------------
- New ``sqlite-utils memory data.csv --schema`` option, for outputting the schema of the in-memory database generated from one or more files. See :ref:`cli_memory_schema_dump_save`. (:issue:`288`)
- Added :ref:`installation instructions `. (:issue:`286`)
.. _v3_10:
3.10 (2021-06-19)
-----------------
This release introduces the ``sqlite-utils memory`` command, which can be used to load CSV or JSON data into a temporary in-memory database and run SQL queries (including joins across multiple files) directly against that data.
Also new: ``sqlite-utils insert --detect-types``, ``sqlite-utils dump``, ``table.use_rowid`` plus some smaller fixes.
sqlite-utils memory
~~~~~~~~~~~~~~~~~~~
This example of ``sqlite-utils memory`` retrieves information about the all of the repositories in the `Dogsheep `__ organization on GitHub using `this JSON API `__, sorts them by their number of stars and outputs a table of the top five (using ``-t``)::
$ curl -s 'https://api.github.com/users/dogsheep/repos' \
| sqlite-utils memory - '
select full_name, forks_count, stargazers_count
from stdin order by stargazers_count desc limit 5
' -t
full_name forks_count stargazers_count
--------------------------------- ------------- ------------------
dogsheep/twitter-to-sqlite 12 225
dogsheep/github-to-sqlite 14 139
dogsheep/dogsheep-photos 5 116
dogsheep/dogsheep.github.io 7 90
dogsheep/healthkit-to-sqlite 4 85
The tool works against files on disk as well. This example joins data from two CSV files::
$ cat creatures.csv
species_id,name
1,Cleo
2,Bants
2,Dori
2,Azi
$ cat species.csv
id,species_name
1,Dog
2,Chicken
$ sqlite-utils memory species.csv creatures.csv '
select * from creatures join species on creatures.species_id = species.id
'
[{"species_id": 1, "name": "Cleo", "id": 1, "species_name": "Dog"},
{"species_id": 2, "name": "Bants", "id": 2, "species_name": "Chicken"},
{"species_id": 2, "name": "Dori", "id": 2, "species_name": "Chicken"},
{"species_id": 2, "name": "Azi", "id": 2, "species_name": "Chicken"}]
Here the ``species.csv`` file becomes the ``species`` table, the ``creatures.csv`` file becomes the ``creatures`` table and the output is JSON, the default output format.
You can also use the ``--attach`` option to attach existing SQLite database files to the in-memory database, in order to join data from CSV or JSON directly against your existing tables.
Full documentation of this new feature is available in :ref:`cli_memory`. (:issue:`272`)
sqlite-utils insert \-\-detect-types
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The :ref:`sqlite-utils insert ` command can be used to insert data from JSON, CSV or TSV files into a SQLite database file. The new ``--detect-types`` option (shortcut ``-d``), when used in conjunction with a CSV or TSV import, will automatically detect if columns in the file are integers or floating point numbers as opposed to treating everything as a text column and create the new table with the corresponding schema. See :ref:`cli_insert_csv_tsv` for details. (:issue:`282`)
Other changes
~~~~~~~~~~~~~
- **Bug fix**: ``table.transform()``, when run against a table without explicit primary keys, would incorrectly create a new version of the table with an explicit primary key column called ``rowid``. (:issue:`284`)
- New ``table.use_rowid`` introspection property, see :ref:`python_api_introspection_use_rowid`. (:issue:`285`)
- The new ``sqlite-utils dump file.db`` command outputs a SQL dump that can be used to recreate a database. (:issue:`274`)
- ``-h`` now works as a shortcut for ``--help``, thanks Loren McIntyre. (:issue:`276`)
- Now using `pytest-cov `__ and `Codecov `__ to track test coverage - currently at 96%. (:issue:`275`)
- SQL errors that occur when using ``sqlite-utils query`` are now displayed as CLI errors.
.. _v3_9_1:
3.9.1 (2021-06-12)
------------------
- Fixed bug when using ``table.upsert_all()`` to create a table with only a single column that is treated as the primary key. (:issue:`271`)
.. _v3_9:
3.9 (2021-06-11)
----------------
- New ``sqlite-utils schema`` command showing the full SQL schema for a database, see :ref:`Showing the schema (CLI)`. (:issue:`268`)
- ``db.schema`` introspection property exposing the same feature to the Python library, see :ref:`Showing the schema (Python library) `.
.. _v3_8:
3.8 (2021-06-02)
----------------
- New ``sqlite-utils indexes`` command to list indexes in a database, see :ref:`cli_indexes`. (:issue:`263`)
- ``table.xindexes`` introspection property returning more details about that table's indexes, see :ref:`python_api_introspection_xindexes`. (:issue:`261`)
.. _v3_7:
3.7 (2021-05-28)
----------------
- New ``table.pks_and_rows_where()`` method returning ``(primary_key, row_dictionary)`` tuples - see :ref:`python_api_pks_and_rows_where`. (:issue:`240`)
- Fixed bug with ``table.add_foreign_key()`` against columns containing spaces. (:issue:`238`)
- ``table_or_view.drop(ignore=True)`` option for avoiding errors if the table or view does not exist. (:issue:`237`)
- ``sqlite-utils drop-view --ignore`` and ``sqlite-utils drop-table --ignore`` options. (:issue:`237`)
- Fixed a bug with inserts of nested JSON containing non-ascii strings - thanks, Dylan Wu. (:issue:`257`)
- Suggest ``--alter`` if an error occurs caused by a missing column. (:issue:`259`)
- Support creating indexes with columns in descending order, see :ref:`API documentation ` and :ref:`CLI documentation `. (:issue:`260`)
- Correctly handle CSV files that start with a UTF-8 BOM. (:issue:`250`)
.. _v3_6:
3.6 (2021-02-18)
----------------
This release adds the ability to execute queries joining data from more than one database file - similar to the cross database querying feature introduced in `Datasette 0.55 `__.
- The ``db.attach(alias, filepath)`` Python method can be used to attach extra databases to the same connection, see :ref:`db.attach() in the Python API documentation `. (:issue:`113`)
- The ``--attach`` option attaches extra aliased databases to run SQL queries against directly on the command-line, see :ref:`attaching additional databases in the CLI documentation `. (:issue:`236`)
.. _v3_5:
3.5 (2021-02-14)
----------------
- ``sqlite-utils insert --sniff`` option for detecting the delimiter and quote character used by a CSV file, see :ref:`cli_insert_csv_tsv_delimiter`. (:issue:`230`)
- The ``table.rows_where()``, ``table.search()`` and ``table.search_sql()`` methods all now take optional ``offset=`` and ``limit=`` arguments. (:issue:`231`)
- New ``--no-headers`` option for ``sqlite-utils insert --csv`` to handle CSV files that are missing the header row, see :ref:`cli_insert_csv_tsv_no_header`. (:issue:`228`)
- Fixed bug where inserting data with extra columns in subsequent chunks would throw an error. Thanks `@nieuwenhoven `__ for the fix. (:issue:`234`)
- Fixed bug importing CSV files with columns containing more than 128KB of data. (:issue:`229`)
- Test suite now runs in CI against Ubuntu, macOS and Windows. Thanks `@nieuwenhoven `__ for the Windows test fixes. (:issue:`232`)
.. _v3_4_1:
3.4.1 (2021-02-05)
------------------
- Fixed a code import bug that slipped in to 3.4. (:issue:`226`)
.. _v3_4:
3.4 (2021-02-05)
----------------
- ``sqlite-utils insert --csv`` now accepts optional ``--delimiter`` and ``--quotechar`` options. See :ref:`cli_insert_csv_tsv_delimiter`. (:issue:`223`)
.. _v3_3:
3.3 (2021-01-17)
----------------
- The ``table.m2m()`` method now accepts an optional ``alter=True`` argument to specify that any missing columns should be added to the referenced table. See :ref:`python_api_m2m`. (:issue:`222`)
.. _v3_2_1:
3.2.1 (2021-01-12)
------------------
- Fixed a bug where ``.add_missing_columns()`` failed to take case insensitive column names into account. (:issue:`221`)
.. _v3_2:
3.2 (2021-01-03)
----------------
This release introduces a new mechanism for speeding up ``count(*)`` queries using cached table counts, stored in a ``_counts`` table and updated by triggers. This mechanism is described in :ref:`python_api_cached_table_counts`, and can be enabled using Python API methods or the new ``enable-counts`` CLI command. (:issue:`212`)
- ``table.enable_counts()`` method for enabling these triggers on a specific table.
- ``db.enable_counts()`` method for enabling triggers on every table in the database. (:issue:`213`)
- New ``sqlite-utils enable-counts my.db`` command for enabling counts on all or specific tables, see :ref:`cli_enable_counts`. (:issue:`214`)
- New ``sqlite-utils triggers`` command for listing the triggers defined for a database or specific tables, see :ref:`cli_triggers`. (:issue:`218`)
- New ``db.use_counts_table`` property which, if ``True``, causes ``table.count`` to read from the ``_counts`` table. (:issue:`215`)
- ``table.has_counts_triggers`` property revealing if a table has been configured with the new ``_counts`` database triggers.
- ``db.reset_counts()`` method and ``sqlite-utils reset-counts`` command for resetting the values in the ``_counts`` table. (:issue:`219`)
- The previously undocumented ``db.escape()`` method has been renamed to ``db.quote()`` and is now covered by the documentation: :ref:`python_api_quote`. (:issue:`217`)
- New ``table.triggers_dict`` and ``db.triggers_dict`` introspection properties. (:issue:`211`, :issue:`216`)
- ``sqlite-utils insert`` now shows a more useful error message for invalid JSON. (:issue:`206`)
.. _v3_1_1:
3.1.1 (2021-01-01)
------------------
- Fixed failing test caused by ``optimize`` sometimes creating larger database files. (:issue:`209`)
- Documentation now lives on https://sqlite-utils.datasette.io/
- README now includes ``brew install sqlite-utils`` installation method.
.. _v3_1:
3.1 (2020-12-12)
----------------
- New command: ``sqlite-utils analyze-tables my.db`` outputs useful information about the table columns in the database, such as the number of distinct values and how many rows are null. See :ref:`cli_analyze_tables` for documentation. (:issue:`207`)
- New ``table.analyze_column(column)`` Python method used by the ``analyze-tables`` command - see :ref:`python_api_analyze_column`.
- The ``table.update()`` method now correctly handles values that should be stored as JSON. Thanks, Andreas Madsack. (`#204 `__)
.. _v3_0:
3.0 (2020-11-08)
----------------
This release introduces a new ``sqlite-utils search`` command for searching tables, see :ref:`cli_search`. (:issue:`192`)
The ``table.search()`` method has been redesigned, see :ref:`python_api_fts_search`. (:issue:`197`)
The release includes minor backwards-incompatible changes, hence the version bump to 3.0. Those changes, which should not affect most users, are:
- The ``-c`` shortcut option for outputting CSV is no longer available. The full ``--csv`` option is required instead.
- The ``-f`` shortcut for ``--fmt`` has also been removed - use ``--fmt``.
- The ``table.search()`` method now defaults to sorting by relevance, not sorting by ``rowid``. (:issue:`198`)
- The ``table.search()`` method now returns a generator over a list of Python dictionaries. It previously returned a list of tuples.
Also in this release:
- The ``query``, ``tables``, ``rows`` and ``search`` CLI commands now accept a new ``--tsv`` option which outputs the results in TSV. (:issue:`193`)
- A new ``table.virtual_table_using`` property reveals if a table is a virtual table, and returns the upper case type of virtual table (e.g. ``FTS4`` or ``FTS5``) if it is. It returns ``None`` if the table is not a virtual table. (:issue:`196`)
- The new ``table.search_sql()`` method returns the SQL for searching a table, see :ref:`python_api_fts_search_sql`.
- ``sqlite-utils rows`` now accepts multiple optional ``-c`` parameters specifying the columns to return. (:issue:`200`)
Changes since the 3.0a0 alpha release:
- The ``sqlite-utils search`` command now defaults to returning every result, unless you add a ``--limit 20`` option.
- The ``sqlite-utils search -c`` and ``table.search(columns=[])`` options are now fully respected. (:issue:`201`)
.. _v2_23:
2.23 (2020-10-28)
-----------------
- ``table.m2m(other_table, records)`` method now takes any iterable, not just a list or tuple. Thanks, Adam Wolf. (`#189 `__)
- ``sqlite-utils insert`` now displays a progress bar for CSV or TSV imports. (:issue:`173`)
- New ``@db.register_function(deterministic=True)`` option for registering deterministic SQLite functions in Python 3.8 or higher. (:issue:`191`)
.. _v2_22:
2.22 (2020-10-16)
-----------------
- New ``--encoding`` option for processing CSV and TSV files that use a non-utf-8 encoding, for both the ``insert`` and ``update`` commands. (:issue:`182`)
- The ``--load-extension`` option is now available to many more commands. (:issue:`137`)
- ``--load-extension=spatialite`` can be used to load SpatiaLite from common installation locations, if it is available. (:issue:`136`)
- Tests now also run against Python 3.9. (:issue:`184`)
- Passing ``pk=["id"]`` now has the same effect as passing ``pk="id"``. (:issue:`181`)
.. _v2_21:
2.21 (2020-09-24)
-----------------
- ``table.extract()`` and ``sqlite-utils extract`` now apply much, much faster - one example operation reduced from twelve minutes to just four seconds! (:issue:`172`)
- ``sqlite-utils extract`` no longer shows a progress bar, because it's fast enough not to need one.
- New ``column_order=`` option for ``table.transform()`` which can be used to alter the order of columns in a table. (:issue:`175`)
- ``sqlite-utils transform --column-order=`` option (with a ``-o`` shortcut) for changing column order. (:issue:`176`)
- The ``table.transform(drop_foreign_keys=)`` parameter and the ``sqlite-utils transform --drop-foreign-key`` option have changed. They now accept just the name of the column rather than requiring all three of the column, other table and other column. This is technically a backwards-incompatible change but I chose not to bump the major version number because the transform feature is so new. (:issue:`177`)
- The table ``.disable_fts()``, ``.rebuild_fts()``, ``.delete()``, ``.delete_where()`` and ``.add_missing_columns()`` methods all now ``return self``, which means they can be chained together with other table operations.
.. _v2_20:
2.20 (2020-09-22)
-----------------
This release introduces two key new capabilities: **transform** (:issue:`114`) and **extract** (:issue:`42`).
Transform
~~~~~~~~~
SQLite's ALTER TABLE has `several documented limitations `__. The ``table.transform()`` Python method and ``sqlite-utils transform`` CLI command work around these limitations using a pattern where a new table with the desired structure is created, data is copied over to it and the old table is then dropped and replaced by the new one.
You can use these tools to change column types, rename columns, drop columns, add and remove ``NOT NULL`` and defaults, remove foreign key constraints and more. See the :ref:`transforming tables (CLI) ` and :ref:`transforming tables (Python library) ` documentation for full details of how to use them.
Extract
~~~~~~~
Sometimes a database table - especially one imported from a CSV file - will contain duplicate data. A ``Trees`` table may include a ``Species`` column with only a few dozen unique values, when the table itself contains thousands of rows.
The ``table.extract()`` method and ``sqlite-utils extract`` commands can extract a column - or multiple columns - out into a separate lookup table, and set up a foreign key relationship from the original table.
The Python library :ref:`extract() documentation ` describes how extraction works in detail, and :ref:`cli_extract` in the CLI documentation includes a detailed example.
Other changes
~~~~~~~~~~~~~
- The ``@db.register_function`` decorator can be used to quickly register Python functions as custom SQL functions, see :ref:`python_api_register_function`. (:issue:`162`)
- The ``table.rows_where()`` method now accepts an optional ``select=`` argument for specifying which columns should be selected, see :ref:`python_api_rows`.
.. _v2_19:
2.19 (2020-09-20)
-----------------
- New ``sqlite-utils add-foreign-keys`` command for :ref:`cli_add_foreign_keys`. (:issue:`157`)
- New ``table.enable_fts(..., replace=True)`` argument for replacing an existing FTS table with a new configuration. (:issue:`160`)
- New ``table.add_foreign_key(..., ignore=True)`` argument for ignoring a foreign key if it already exists. (:issue:`112`)
.. _v2_18:
2.18 (2020-09-08)
-----------------
- ``table.rebuild_fts()`` method for rebuilding a FTS index, see :ref:`python_api_fts_rebuild`. (:issue:`155`)
- ``sqlite-utils rebuild-fts data.db`` command for rebuilding FTS indexes across all tables, or just specific tables. (:issue:`155`)
- ``table.optimize()`` method no longer deletes junk rows from the ``*_fts_docsize`` table. This was added in 2.17 but it turns out running ``table.rebuild_fts()`` is a better solution to this problem.
- Fixed a bug where rows with additional columns that are inserted after the first batch of records could cause an error due to breaking SQLite's maximum number of parameters. Thanks, Simon Wiles. (:issue:`145`)
.. _v2_17:
2.17 (2020-09-07)
-----------------
This release handles a bug where replacing rows in FTS tables could result in growing numbers of unnecessary rows in the associated ``*_fts_docsize`` table. (:issue:`149`)
- ``PRAGMA recursive_triggers=on`` by default for all connections. You can turn it off with ``Database(recursive_triggers=False)``. (:issue:`152`)
- ``table.optimize()`` method now deletes unnecessary rows from the ``*_fts_docsize`` table. (:issue:`153`)
- New tracer method for tracking underlying SQL queries, see :ref:`python_api_tracing`. (:issue:`150`)
- Neater indentation for schema SQL. (:issue:`148`)
- Documentation for ``sqlite_utils.AlterError`` exception thrown by in ``add_foreign_keys()``.
.. _v2_16_1:
2.16.1 (2020-08-28)
-------------------
- ``insert_all(..., alter=True)`` now works for columns introduced after the first 100 records. Thanks, Simon Wiles! (:issue:`139`)
- Continuous Integration is now powered by GitHub Actions. (:issue:`143`)
.. _v2_16:
2.16 (2020-08-21)
-----------------
- ``--load-extension`` option for ``sqlite-utils query`` for loading SQLite extensions. (:issue:`134`)
- New ``sqlite_utils.utils.find_spatialite()`` function for finding SpatiaLite in common locations. (:issue:`135`)
.. _v2_15_1:
2.15.1 (2020-08-12)
-------------------
- Now available as a ``sdist`` package on PyPI in addition to a wheel. (:issue:`133`)
.. _v2_15:
2.15 (2020-08-10)
-----------------
- New ``db.enable_wal()`` and ``db.disable_wal()`` methods for enabling and disabling `Write-Ahead Logging `__ for a database file - see :ref:`python_api_wal` in the Python API documentation.
- Also ``sqlite-utils enable-wal file.db`` and ``sqlite-utils disable-wal file.db`` commands for doing the same thing on the command-line, see :ref:`WAL mode (CLI) `. (:issue:`132`)
.. _v2_14_1:
2.14.1 (2020-08-05)
-------------------
- Documentation improvements.
.. _v2_14:
2.14 (2020-08-01)
-----------------
- The :ref:`insert-files command ` can now read from standard input: ``cat dog.jpg | sqlite-utils insert-files dogs.db pics - --name=dog.jpg``. (:issue:`127`)
- You can now specify a full-text search tokenizer using the new ``tokenize=`` parameter to :ref:`enable_fts() `. This means you can enable Porter stemming on a table by running ``db["articles"].enable_fts(["headline", "body"], tokenize="porter")``. (:issue:`130`)
- You can also set a custom tokenizer using the :ref:`sqlite-utils enable-fts ` CLI command, via the new ``--tokenize`` option.
.. _v2_13:
2.13 (2020-07-29)
-----------------
- ``memoryview`` and ``uuid.UUID`` objects are now supported. ``memoryview`` objects will be stored using ``BLOB`` and ``uuid.UUID`` objects will be stored using ``TEXT``. (:issue:`128`)
.. _v2_12:
2.12 (2020-07-27)
-----------------
The theme of this release is better tools for working with binary data. The new ``insert-files`` command can be used to insert binary files directly into a database table, and other commands have been improved with better support for BLOB columns.
- ``sqlite-utils insert-files my.db gifs *.gif`` can now insert the contents of files into a specified table. The columns in the table can be customized to include different pieces of metadata derived from the files. See :ref:`cli_insert_files`. (:issue:`122`)
- ``--raw`` option to ``sqlite-utils query`` - for outputting just a single raw column value - see :ref:`cli_query_raw`. (:issue:`123`)
- JSON output now encodes BLOB values as special base64 objects - see :ref:`cli_query_json`. (:issue:`125`)
- The same format of JSON base64 objects can now be used to insert binary data - see :ref:`cli_inserting_data`. (:issue:`126`)
- The ``sqlite-utils query`` command can now accept named parameters, e.g. ``sqlite-utils :memory: "select :num * :num2" -p num 5 -p num2 6`` - see :ref:`cli_query_json`. (:issue:`124`)
.. _v2_11:
2.11 (2020-07-08)
-----------------
- New ``--truncate`` option to ``sqlite-utils insert``, and ``truncate=True`` argument to ``.insert_all()``. Thanks, Thomas Sibley. (`#118 `__)
- The ``sqlite-utils query`` command now runs updates in a transaction. Thanks, Thomas Sibley. (`#120 `__)
.. _v2_10_1:
2.10.1 (2020-06-23)
-------------------
- Added documentation for the ``table.pks`` introspection property. (:issue:`116`)
.. _v2_10:
2.10 (2020-06-12)
-----------------
- The ``sqlite-utils`` command now supports UPDATE/INSERT/DELETE in addition to SELECT. (:issue:`115`)
.. _v2_9_1:
2.9.1 (2020-05-11)
------------------
- Added custom project links to the `PyPI listing `__.
.. _v2_9:
2.9 (2020-05-10)
----------------
- New ``sqlite-utils drop-table`` command, see :ref:`cli_drop_table`. (:issue:`111`)
- New ``sqlite-utils drop-view`` command, see :ref:`cli_drop_view`.
- Python ``decimal.Decimal`` objects are now stored as ``FLOAT``. (:issue:`110`)
.. _v2_8:
2.8 (2020-05-03)
----------------
- New ``sqlite-utils create-table`` command, see :ref:`cli_create_table`. (:issue:`27`)
- New ``sqlite-utils create-view`` command, see :ref:`cli_create_view`. (:issue:`107`)
.. _v2_7.2:
2.7.2 (2020-05-02)
------------------
- ``db.create_view(...)`` now has additional parameters ``ignore=True`` or ``replace=True``, see :ref:`python_api_create_view`. (:issue:`106`)
.. _v2_7.1:
2.7.1 (2020-05-01)
------------------
- New ``sqlite-utils views my.db`` command for listing views in a database, see :ref:`cli_views`. (:issue:`105`)
- ``sqlite-utils tables`` (and ``views``) has a new ``--schema`` option which outputs the table/view schema, see :ref:`cli_tables`. (:issue:`104`)
- Nested structures containing invalid JSON values (e.g. Python bytestrings) are now serialized using ``repr()`` instead of throwing an error. (:issue:`102`)
.. _v2_7:
2.7 (2020-04-17)
----------------
- New ``columns=`` argument for the ``.insert()``, ``.insert_all()``, ``.upsert()`` and ``.upsert_all()`` methods, for over-riding the auto-detected types for columns and specifying additional columns that should be added when the table is created. See :ref:`python_api_custom_columns`. (:issue:`100`)
.. _v2_6:
2.6 (2020-04-15)
----------------
- New ``table.rows_where(..., order_by="age desc")`` argument, see :ref:`python_api_rows`. (:issue:`76`)
.. _v2_5:
2.5 (2020-04-12)
----------------
- Panda's Timestamp is now stored as a SQLite TEXT column. Thanks, b0b5h4rp13! (:issue:`96`)
- ``table.last_pk`` is now only available for inserts or upserts of a single record. (:issue:`98`)
- New ``Database(filepath, recreate=True)`` parameter for deleting and recreating the database. (:issue:`97`)
.. _v2_4_4:
2.4.4 (2020-03-23)
------------------
- Fixed bug where columns with only null values were not correctly created. (:issue:`95`)
.. _v2_4_3:
2.4.3 (2020-03-23)
------------------
- Column type suggestion code is no longer confused by null values. (:issue:`94`)
.. _v2_4_2:
2.4.2 (2020-03-14)
------------------
- ``table.column_dicts`` now works with all column types - previously it would throw errors on types other than ``TEXT``, ``BLOB``, ``INTEGER`` or ``FLOAT``. (:issue:`92`)
- Documentation for ``NotFoundError`` thrown by ``table.get(pk)`` - see :ref:`python_api_get`.
.. _v2_4_1:
2.4.1 (2020-03-01)
------------------
- ``table.enable_fts()`` now works with columns that contain spaces. (:issue:`90`)
.. _v2_4:
2.4 (2020-02-26)
----------------
- ``table.disable_fts()`` can now be used to remove FTS tables and triggers that were created using ``table.enable_fts(...)``. (:issue:`88`)
- The ``sqlite-utils disable-fts`` command can be used to remove FTS tables and triggers from the command-line. (:issue:`88`)
- Trying to create table columns with square braces ([ or ]) in the name now raises an error. (:issue:`86`)
- Subclasses of ``dict``, ``list`` and ``tuple`` are now detected as needing a JSON column. (:issue:`87`)
.. _v2_3_1:
2.3.1 (2020-02-10)
------------------
``table.create_index()`` now works for columns that contain spaces. (:issue:`85`)
.. _v2_3:
2.3 (2020-02-08)
----------------
``table.exists()`` is now a method, not a property. This was not a documented part of the API before so I'm considering this a non-breaking change. (:issue:`83`)
.. _v2_2_1:
2.2.1 (2020-02-06)
------------------
Fixed a bug where ``.upsert(..., hash_id="pk")`` threw an error (:issue:`84`).
.. _v2_2:
2.2 (2020-02-01)
----------------
New feature: ``sqlite_utils.suggest_column_types([records])`` returns the suggested column types for a list of records. See :ref:`python_api_suggest_column_types`. (:issue:`81`).
This replaces the undocumented ``table.detect_column_types()`` method.
.. _v2_1:
2.1 (2020-01-30)
----------------
New feature: ``conversions={...}`` can be passed to the ``.insert()`` family of functions to specify SQL conversions that should be applied to values that are being inserted or updated. See :ref:`python_api_conversions` . (`#77 `__).
.. _v2_0_1:
2.0.1 (2020-01-05)
------------------
The ``.upsert()`` and ``.upsert_all()`` methods now raise a ``sqlite_utils.db.PrimaryKeyRequired`` exception if you call them without specifying the primary key column using ``pk=`` (:issue:`73`).
.. _v2:
2.0 (2019-12-29)
----------------
This release changes the behaviour of ``upsert``. It's a breaking change, hence ``2.0``.
The ``upsert`` command-line utility and the ``.upsert()`` and ``.upsert_all()`` Python API methods have had their behaviour altered. They used to completely replace the affected records: now, they update the specified values on existing records but leave other columns unaffected.
See :ref:`Upserting data using the Python API ` and :ref:`Upserting data using the CLI ` for full details.
If you want the old behaviour - where records were completely replaced - you can use ``$ sqlite-utils insert ... --replace`` on the command-line and ``.insert(..., replace=True)`` and ``.insert_all(..., replace=True)`` in the Python API. See :ref:`Insert-replacing data using the Python API ` and :ref:`Insert-replacing data using the CLI ` for more.
For full background on this change, see `issue #66 `__.
.. _v1_12_1:
1.12.1 (2019-11-06)
-------------------
- Fixed error thrown when ``.insert_all()`` and ``.upsert_all()`` were called with empty lists (:issue:`52`)
.. _v1_12:
1.12 (2019-11-04)
-----------------
Python library utilities for deleting records (:issue:`62`)
- ``db["tablename"].delete(4)`` to delete by primary key, see :ref:`python_api_delete`
- ``db["tablename"].delete_where("id > ?", [3])`` to delete by a where clause, see :ref:`python_api_delete_where`
.. _v1_11:
1.11 (2019-09-02)
-----------------
Option to create triggers to automatically keep FTS tables up-to-date with newly inserted, updated and deleted records. Thanks, Amjith Ramanujam! (`#57 `__)
- ``sqlite-utils enable-fts ... --create-triggers`` - see :ref:`Configuring full-text search using the CLI `
- ``db["tablename"].enable_fts(..., create_triggers=True)`` - see :ref:`Configuring full-text search using the Python library `
- Support for introspecting triggers for a database or table - see :ref:`python_api_introspection` (:issue:`59`)
.. _v1_10:
1.10 (2019-08-23)
-----------------
Ability to introspect and run queries against views (:issue:`54`)
- ``db.view_names()`` method and and ``db.views`` property
- Separate ``View`` and ``Table`` classes, both subclassing new ``Queryable`` class
- ``view.drop()`` method
See :ref:`python_api_views`.
.. _v1_9:
1.9 (2019-08-04)
----------------
- ``table.m2m(...)`` method for creating many-to-many relationships: :ref:`python_api_m2m` (:issue:`23`)
.. _v1_8:
1.8 (2019-07-28)
----------------
- ``table.update(pk, values)`` method: :ref:`python_api_update` (:issue:`35`)
.. _v1_7_1:
1.7.1 (2019-07-28)
------------------
- Fixed bug where inserting records with 11 columns in a batch of 100 triggered a "too many SQL variables" error (:issue:`50`)
- Documentation and tests for ``table.drop()`` method: :ref:`python_api_drop`
.. _v1_7:
1.7 (2019-07-24)
----------------
Support for lookup tables.
- New ``table.lookup({...})`` utility method for building and querying lookup tables - see :ref:`python_api_lookup_tables` (:issue:`44`)
- New ``extracts=`` table configuration option, see :ref:`python_api_extracts` (:issue:`46`)
- Use `pysqlite3 `__ if it is available, otherwise use ``sqlite3`` from the standard library
- Table options can now be passed to the new ``db.table(name, **options)`` factory function in addition to being passed to ``insert_all(records, **options)`` and friends - see :ref:`python_api_table_configuration`
- In-memory databases can now be created using ``db = Database(memory=True)``
.. _v1_6:
1.6 (2019-07-18)
----------------
- ``sqlite-utils insert`` can now accept TSV data via the new ``--tsv`` option (:issue:`41`)
.. _v1_5:
1.5 (2019-07-14)
----------------
- Support for compound primary keys (:issue:`36`)
- Configure these using the CLI tool by passing ``--pk`` multiple times
- In Python, pass a tuple of columns to the ``pk=(..., ...)`` argument: :ref:`python_api_compound_primary_keys`
- New ``table.get()`` method for retrieving a record by its primary key: :ref:`python_api_get` (:issue:`39`)
.. _v1_4_1:
1.4.1 (2019-07-14)
------------------
- Assorted minor documentation fixes: `changes since 1.4 `__
.. _v1_4:
1.4 (2019-06-30)
----------------
- Added ``sqlite-utils index-foreign-keys`` command (:ref:`docs `) and ``db.index_foreign_keys()`` method (:ref:`docs `) (:issue:`33`)
.. _v1_3:
1.3 (2019-06-28)
----------------
- New mechanism for adding multiple foreign key constraints at once: :ref:`db.add_foreign_keys() documentation ` (:issue:`31`)
.. _v1_2_2:
1.2.2 (2019-06-25)
------------------
- Fixed bug where ``datetime.time`` was not being handled correctly
.. _v1_2_1:
1.2.1 (2019-06-20)
------------------
- Check the column exists before attempting to add a foreign key (:issue:`29`)
.. _v1_2:
1.2 (2019-06-12)
----------------
- Improved foreign key definitions: you no longer need to specify the ``column``, ``other_table`` AND ``other_column`` to define a foreign key - if you omit the ``other_table`` or ``other_column`` the script will attempt to guess the correct values by introspecting the database. See :ref:`python_api_add_foreign_key` for details. (:issue:`25`)
- Ability to set ``NOT NULL`` constraints and ``DEFAULT`` values when creating tables (:issue:`24`). Documentation: :ref:`Setting defaults and not null constraints (Python API) `, :ref:`Setting defaults and not null constraints (CLI) `
- Support for ``not_null_default=X`` / ``--not-null-default`` for setting a ``NOT NULL DEFAULT 'x'`` when adding a new column. Documentation: :ref:`Adding columns (Python API) `, :ref:`Adding columns (CLI) `
.. _v1_1:
1.1 (2019-05-28)
----------------
- Support for ``ignore=True`` / ``--ignore`` for ignoring inserted records if the primary key already exists (:issue:`21`) - documentation: :ref:`Inserting data (Python API) `, :ref:`Inserting data (CLI) `
- Ability to add a column that is a foreign key reference using ``fk=...`` / ``--fk`` (:issue:`16`) - documentation: :ref:`Adding columns (Python API) `, :ref:`Adding columns (CLI) `
.. _v1_0_1:
1.0.1 (2019-05-27)
------------------
- ``sqlite-utils rows data.db table --json-cols`` - fixed bug where ``--json-cols`` was not obeyed
.. _v1_0:
1.0 (2019-05-24)
----------------
- Option to automatically add new columns if you attempt to insert or upsert data with extra fields:
``sqlite-utils insert ... --alter`` - see :ref:`Adding columns automatically with the sqlite-utils CLI `
``db["tablename"].insert(record, alter=True)`` - see :ref:`Adding columns automatically using the Python API `
- New ``--json-cols`` option for outputting nested JSON, see :ref:`cli_json_values`
.. _v0_14:
0.14 (2019-02-24)
-----------------
- Ability to create unique indexes: ``db["mytable"].create_index(["name"], unique=True)``
- ``db["mytable"].create_index(["name"], if_not_exists=True)``
- ``$ sqlite-utils create-index mydb.db mytable col1 [col2...]``, see :ref:`cli_create_index`
- ``table.add_column(name, type)`` method, see :ref:`python_api_add_column`
- ``$ sqlite-utils add-column mydb.db mytable nameofcolumn``, see :ref:`cli_add_column` (CLI)
- ``db["books"].add_foreign_key("author_id", "authors", "id")``, see :ref:`python_api_add_foreign_key`
- ``$ sqlite-utils add-foreign-key books.db books author_id authors id``, see :ref:`cli_add_foreign_key` (CLI)
- Improved (but backwards-incompatible) ``foreign_keys=`` argument to various methods, see :ref:`python_api_foreign_keys`
.. _v0_13:
0.13 (2019-02-23)
-----------------
- New ``--table`` and ``--fmt`` options can be used to output query results in a variety of visual table formats, see :ref:`cli_query_table`
- New ``hash_id=`` argument can now be used for :ref:`python_api_hash`
- Can now derive correct column types for numpy int, uint and float values
- ``table.last_id`` has been renamed to ``table.last_rowid``
- ``table.last_pk`` now contains the last inserted primary key, if ``pk=`` was specified
- Prettier indentation in the ``CREATE TABLE`` generated schemas
.. _v0_12:
0.12 (2019-02-22)
-----------------
- Added ``db[table].rows`` iterator - see :ref:`python_api_rows`
- Replaced ``sqlite-utils json`` and ``sqlite-utils csv`` with a new default subcommand called ``sqlite-utils query`` which defaults to JSON and takes formatting options ``--nl``, ``--csv`` and ``--no-headers`` - see :ref:`cli_query_json` and :ref:`cli_query_csv`
- New ``sqlite-utils rows data.db name-of-table`` command, see :ref:`cli_rows`
- ``sqlite-utils table`` command now takes options ``--counts`` and ``--columns`` plus the standard output format options, see :ref:`cli_tables`
.. _v0_11:
0.11 (2019-02-07)
-----------------
New commands for enabling FTS against a table and columns::
sqlite-utils enable-fts db.db mytable col1 col2
See :ref:`cli_fts`.
.. _v0_10:
0.10 (2019-02-06)
-----------------
Handle ``datetime.date`` and ``datetime.time`` values.
New option for efficiently inserting rows from a CSV:
::
sqlite-utils insert db.db foo - --csv
.. _v0_9:
0.9 (2019-01-27)
----------------
Improved support for newline-delimited JSON.
``sqlite-utils insert`` has two new command-line options:
* ``--nl`` means "expect newline-delimited JSON". This is an extremely efficient way of loading in large amounts of data, especially if you pipe it into standard input.
* ``--batch-size=1000`` lets you increase the batch size (default is 100). A commit will be issued every X records. This also control how many initial records are considered when detecting the desired SQL table schema for the data.
In the Python API, the ``table.insert_all(...)`` method can now accept a generator as well as a list of objects. This will be efficiently used to populate the table no matter how many records are produced by the generator.
The ``Database()`` constructor can now accept a ``pathlib.Path`` object in addition to a string or an existing SQLite connection object.
.. _v0_8:
0.8 (2019-01-25)
----------------
Two new commands: ``sqlite-utils csv`` and ``sqlite-utils json``
These commands execute a SQL query and return the results as CSV or JSON. See :ref:`cli_query_csv` and :ref:`cli_query_json` for more details.
::
$ sqlite-utils json --help
Usage: sqlite-utils json [OPTIONS] PATH SQL
Execute SQL query and return the results as JSON
Options:
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--help Show this message and exit.
$ sqlite-utils csv --help
Usage: sqlite-utils csv [OPTIONS] PATH SQL
Execute SQL query and return the results as CSV
Options:
--no-headers Exclude headers from CSV output
--help Show this message and exit.
.. _v0_7:
0.7 (2019-01-24)
----------------
This release implements the ``sqlite-utils`` command-line tool with a number of useful subcommands.
- ``sqlite-utils tables demo.db`` lists the tables in the database
- ``sqlite-utils tables demo.db --fts4`` shows just the FTS4 tables
- ``sqlite-utils tables demo.db --fts5`` shows just the FTS5 tables
- ``sqlite-utils vacuum demo.db`` runs VACUUM against the database
- ``sqlite-utils optimize demo.db`` runs OPTIMIZE against all FTS tables, then VACUUM
- ``sqlite-utils optimize demo.db --no-vacuum`` runs OPTIMIZE but skips VACUUM
The two most useful subcommands are ``upsert`` and ``insert``, which allow you to ingest JSON files with one or more records in them, creating the corresponding table with the correct columns if it does not already exist. See :ref:`cli_inserting_data` for more details.
- ``sqlite-utils insert demo.db dogs dogs.json --pk=id`` inserts new records from ``dogs.json`` into the ``dogs`` table
- ``sqlite-utils upsert demo.db dogs dogs.json --pk=id`` upserts records, replacing any records with duplicate primary keys
One backwards incompatible change: the ``db["table"].table_names`` property is now a method:
- ``db["table"].table_names()`` returns a list of table names
- ``db["table"].table_names(fts4=True)`` returns a list of just the FTS4 tables
- ``db["table"].table_names(fts5=True)`` returns a list of just the FTS5 tables
A few other changes:
- Plenty of updated documentation, including full coverage of the new command-line tool
- Allow column names to be reserved words (use correct SQL escaping)
- Added automatic column support for bytes and datetime.datetime
.. _v0_6:
0.6 (2018-08-12)
----------------
- ``.enable_fts()`` now takes optional argument ``fts_version``, defaults to ``FTS5``. Use ``FTS4`` if the version of SQLite bundled with your Python does not support FTS5
- New optional ``column_order=`` argument to ``.insert()`` and friends for providing a partial or full desired order of the columns when a database table is created
- :ref:`New documentation ` for ``.insert_all()`` and ``.upsert()`` and ``.upsert_all()``
.. _v0_5:
0.5 (2018-08-05)
----------------
- ``db.tables`` and ``db.table_names`` introspection properties
- ``db.indexes`` property for introspecting indexes
- ``table.create_index(columns, index_name)`` method
- ``db.create_view(name, sql)`` method
- Table methods can now be chained, plus added ``table.last_id`` for accessing the last inserted row ID
0.4 (2018-07-31)
----------------
- ``enable_fts()``, ``populate_fts()`` and ``search()`` table methods
0.3.1 (2018-07-31)
------------------
- Documented related projects
- Added badges to the documentation
0.3 (2018-07-31)
----------------
- New ``Table`` class representing a table in the SQLite database
0.2 (2018-07-28)
----------------
- Initial release to PyPI
sqlite-utils-4.0a0/docs/cli-reference.rst 0000664 0000000 0000000 00000141202 15007276064 0020431 0 ustar 00root root 0000000 0000000 .. _cli_reference:
===============
CLI reference
===============
This page lists the ``--help`` for every ``sqlite-utils`` CLI sub-command.
.. contents:: :local:
:class: this-will-duplicate-information-and-it-is-still-useful-here
.. [[[cog
from sqlite_utils import cli
import sys
sys._called_from_test = True
from click.testing import CliRunner
import textwrap
commands = list(cli.cli.commands.keys())
go_first = [
"query", "memory", "insert", "upsert", "bulk", "search", "transform", "extract",
"schema", "insert-files", "analyze-tables", "convert", "tables", "views", "rows",
"triggers", "indexes", "create-database", "create-table", "create-index",
"enable-fts", "populate-fts", "rebuild-fts", "disable-fts"
]
refs = {
"query": "cli_query",
"memory": "cli_memory",
"insert": [
"cli_inserting_data", "cli_insert_csv_tsv", "cli_insert_unstructured", "cli_insert_convert"
],
"upsert": "cli_upsert",
"tables": "cli_tables",
"views": "cli_views",
"optimize": "cli_optimize",
"rows": "cli_rows",
"triggers": "cli_triggers",
"indexes": "cli_indexes",
"enable-fts": "cli_fts",
"analyze": "cli_analyze",
"vacuum": "cli_vacuum",
"dump": "cli_dump",
"add-column": "cli_add_column",
"rename-table": "cli_renaming_tables",
"duplicate": "cli_duplicate_table",
"add-foreign-key": "cli_add_foreign_key",
"add-foreign-keys": "cli_add_foreign_keys",
"index-foreign-keys": "cli_index_foreign_keys",
"create-index": "cli_create_index",
"enable-wal": "cli_wal",
"enable-counts": "cli_enable_counts",
"bulk": "cli_bulk",
"create-database": "cli_create_database",
"create-table": "cli_create_table",
"drop-table": "cli_drop_table",
"create-view": "cli_create_view",
"drop-view": "cli_drop_view",
"search": "cli_search",
"transform": "cli_transform_table",
"extract": "cli_extract",
"schema": "cli_schema",
"insert-files": "cli_insert_files",
"analyze-tables": "cli_analyze_tables",
"convert": "cli_convert",
"add-geometry-column": "cli_spatialite",
"create-spatial-index": "cli_spatialite_indexes",
"install": "cli_install",
"uninstall": "cli_uninstall",
}
commands.sort(key = lambda command: go_first.index(command) if command in go_first else 999)
cog.out("\n")
for command in commands:
cog.out(".. _cli_ref_" + command.replace("-", "_") + ":\n\n")
cog.out(command + "\n")
cog.out(("=" * len(command)) + "\n\n")
if command in refs:
command_refs = refs[command]
if isinstance(command_refs, str):
command_refs = [command_refs]
cog.out(
"See {}.\n\n".format(
", ".join(":ref:`{}`".format(c) for c in command_refs)
)
)
cog.out("::\n\n")
result = CliRunner().invoke(cli.cli, [command, "--help"])
output = result.output.replace("Usage: cli ", "Usage: sqlite-utils ")
output = output.replace('\b', '')
cog.out(textwrap.indent(output, ' '))
cog.out("\n\n")
.. ]]]
.. _cli_ref_query:
query
=====
See :ref:`cli_query`.
::
Usage: sqlite-utils query [OPTIONS] PATH SQL
Execute SQL query and return the results as JSON
Example:
sqlite-utils data.db \
"select * from chickens where age > :age" \
-p age 1
Options:
--attach ... Additional databases to attach - specify alias and
filepath
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira,
latex, latex_booktabs, latex_longtable, latex_raw,
mediawiki, mixed_grid, mixed_outline, moinmoin,
orgtbl, outline, pipe, plain, presto, pretty,
psql, rounded_grid, rounded_outline, rst, simple,
simple_grid, simple_outline, textile, tsv,
unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not
escaped strings
-r, --raw Raw output, first column of first row
--raw-lines Raw output, first column of each row
-p, --param ... Named :parameters for SQL query
--functions TEXT Python code defining one or more custom SQL
functions
--load-extension TEXT Path to SQLite extension, with optional
:entrypoint
-h, --help Show this message and exit.
.. _cli_ref_memory:
memory
======
See :ref:`cli_memory`.
::
Usage: sqlite-utils memory [OPTIONS] [PATHS]... SQL
Execute SQL query against an in-memory database, optionally populated by
imported data
To import data from CSV, TSV or JSON files pass them on the command-line:
sqlite-utils memory one.csv two.json \
"select * from one join two on one.two_id = two.id"
For data piped into the tool from standard input, use "-" or "stdin":
cat animals.csv | sqlite-utils memory - \
"select * from stdin where species = 'dog'"
The format of the data will be automatically detected. You can specify the
format explicitly using :json, :csv, :tsv or :nl (for newline-delimited JSON)
- for example:
cat animals.csv | sqlite-utils memory stdin:csv places.dat:nl \
"select * from stdin where place_id in (select id from places)"
Use --schema to view the SQL schema of any imported files:
sqlite-utils memory animals.csv --schema
Options:
--functions TEXT Python code defining one or more custom SQL
functions
--attach ... Additional databases to attach - specify alias and
filepath
--flatten Flatten nested JSON objects, so {"foo": {"bar":
1}} becomes {"foo_bar": 1}
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira,
latex, latex_booktabs, latex_longtable, latex_raw,
mediawiki, mixed_grid, mixed_outline, moinmoin,
orgtbl, outline, pipe, plain, presto, pretty,
psql, rounded_grid, rounded_outline, rst, simple,
simple_grid, simple_outline, textile, tsv,
unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not
escaped strings
-r, --raw Raw output, first column of first row
--raw-lines Raw output, first column of each row
-p, --param ... Named :parameters for SQL query
--encoding TEXT Character encoding for CSV input, defaults to
utf-8
-n, --no-detect-types Treat all CSV/TSV columns as TEXT
--schema Show SQL schema for in-memory database
--dump Dump SQL for in-memory database
--save FILE Save in-memory database to this file
--analyze Analyze resulting tables and output results
--load-extension TEXT Path to SQLite extension, with optional
:entrypoint
-h, --help Show this message and exit.
.. _cli_ref_insert:
insert
======
See :ref:`cli_inserting_data`, :ref:`cli_insert_csv_tsv`, :ref:`cli_insert_unstructured`, :ref:`cli_insert_convert`.
::
Usage: sqlite-utils insert [OPTIONS] PATH TABLE FILE
Insert records from FILE into a table, creating the table if it does not
already exist.
Example:
echo '{"name": "Lila"}' | sqlite-utils insert data.db chickens -
By default the input is expected to be a JSON object or array of objects.
- Use --nl for newline-delimited JSON objects
- Use --csv or --tsv for comma-separated or tab-separated input
- Use --lines to write each incoming line to a column called "line"
- Use --text to write the entire input to a column called "text"
You can also use --convert to pass a fragment of Python code that will be used
to convert each input.
Your Python code will be passed a "row" variable representing the imported
row, and can return a modified row.
This example uses just the name, latitude and longitude columns from a CSV
file, converting name to upper case and latitude and longitude to floating
point numbers:
sqlite-utils insert plants.db plants plants.csv --csv --convert '
return {
"name": row["name"].upper(),
"latitude": float(row["latitude"]),
"longitude": float(row["longitude"]),
}'
If you are using --lines your code will be passed a "line" variable, and for
--text a "text" variable.
When using --text your function can return an iterator of rows to insert. This
example inserts one record per word in the input:
echo 'A bunch of words' | sqlite-utils insert words.db words - \
--text --convert '({"word": w} for w in text.split())'
Options:
--pk TEXT Columns to use as the primary key, e.g. id
--flatten Flatten nested JSON objects, so {"a": {"b": 1}}
becomes {"a_b": 1}
--nl Expect newline-delimited JSON
-c, --csv Expect CSV input
--tsv Expect TSV input
--empty-null Treat empty strings as NULL
--lines Treat each line as a single value called 'line'
--text Treat input as a single value called 'text'
--convert TEXT Python code to convert each item
--import TEXT Python modules to import
--delimiter TEXT Delimiter to use for CSV files
--quotechar TEXT Quote character to use for CSV/TSV
--sniff Detect delimiter and quote character
--no-headers CSV file has no header row
--encoding TEXT Character encoding for input, defaults to utf-8
--batch-size INTEGER Commit every X records
--stop-after INTEGER Stop after X records
--alter Alter existing table to add any missing columns
--not-null TEXT Columns that should be created as NOT NULL
--default ... Default value that should be set for a column
-d, --detect-types Detect types for columns in CSV/TSV data
--analyze Run ANALYZE at the end of this operation
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
--silent Do not show progress bar
--strict Apply STRICT mode to created table
--ignore Ignore records if pk already exists
--replace Replace records if pk already exists
--truncate Truncate table before inserting records, if table
already exists
-h, --help Show this message and exit.
.. _cli_ref_upsert:
upsert
======
See :ref:`cli_upsert`.
::
Usage: sqlite-utils upsert [OPTIONS] PATH TABLE FILE
Upsert records based on their primary key. Works like 'insert' but if an
incoming record has a primary key that matches an existing record the existing
record will be updated.
Example:
echo '[
{"id": 1, "name": "Lila"},
{"id": 2, "name": "Suna"}
]' | sqlite-utils upsert data.db chickens - --pk id
Options:
--pk TEXT Columns to use as the primary key, e.g. id
[required]
--flatten Flatten nested JSON objects, so {"a": {"b": 1}}
becomes {"a_b": 1}
--nl Expect newline-delimited JSON
-c, --csv Expect CSV input
--tsv Expect TSV input
--empty-null Treat empty strings as NULL
--lines Treat each line as a single value called 'line'
--text Treat input as a single value called 'text'
--convert TEXT Python code to convert each item
--import TEXT Python modules to import
--delimiter TEXT Delimiter to use for CSV files
--quotechar TEXT Quote character to use for CSV/TSV
--sniff Detect delimiter and quote character
--no-headers CSV file has no header row
--encoding TEXT Character encoding for input, defaults to utf-8
--batch-size INTEGER Commit every X records
--stop-after INTEGER Stop after X records
--alter Alter existing table to add any missing columns
--not-null TEXT Columns that should be created as NOT NULL
--default ... Default value that should be set for a column
-d, --detect-types Detect types for columns in CSV/TSV data
--analyze Run ANALYZE at the end of this operation
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
--silent Do not show progress bar
--strict Apply STRICT mode to created table
-h, --help Show this message and exit.
.. _cli_ref_bulk:
bulk
====
See :ref:`cli_bulk`.
::
Usage: sqlite-utils bulk [OPTIONS] PATH SQL FILE
Execute parameterized SQL against the provided list of documents.
Example:
echo '[
{"id": 1, "name": "Lila2"},
{"id": 2, "name": "Suna2"}
]' | sqlite-utils bulk data.db '
update chickens set name = :name where id = :id
' -
Options:
--batch-size INTEGER Commit every X records
--functions TEXT Python code defining one or more custom SQL functions
--flatten Flatten nested JSON objects, so {"a": {"b": 1}} becomes
{"a_b": 1}
--nl Expect newline-delimited JSON
-c, --csv Expect CSV input
--tsv Expect TSV input
--empty-null Treat empty strings as NULL
--lines Treat each line as a single value called 'line'
--text Treat input as a single value called 'text'
--convert TEXT Python code to convert each item
--import TEXT Python modules to import
--delimiter TEXT Delimiter to use for CSV files
--quotechar TEXT Quote character to use for CSV/TSV
--sniff Detect delimiter and quote character
--no-headers CSV file has no header row
--encoding TEXT Character encoding for input, defaults to utf-8
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_search:
search
======
See :ref:`cli_search`.
::
Usage: sqlite-utils search [OPTIONS] PATH DBTABLE Q
Execute a full-text search against this table
Example:
sqlite-utils search data.db chickens lila
Options:
-o, --order TEXT Order by ('column' or 'column desc')
-c, --column TEXT Columns to return
--limit INTEGER Number of rows to return - defaults to everything
--sql Show SQL query that would be run
--quote Apply FTS quoting rules to search term
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira, latex,
latex_booktabs, latex_longtable, latex_raw, mediawiki,
mixed_grid, mixed_outline, moinmoin, orgtbl, outline,
pipe, plain, presto, pretty, psql, rounded_grid,
rounded_outline, rst, simple, simple_grid,
simple_outline, textile, tsv, unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not escaped
strings
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_transform:
transform
=========
See :ref:`cli_transform_table`.
::
Usage: sqlite-utils transform [OPTIONS] PATH TABLE
Transform a table beyond the capabilities of ALTER TABLE
Example:
sqlite-utils transform mydb.db mytable \
--drop column1 \
--rename column2 column_renamed
Options:
--type ... Change column type to INTEGER, TEXT, FLOAT or
BLOB
--drop TEXT Drop this column
--rename ... Rename this column to X
-o, --column-order TEXT Reorder columns
--not-null TEXT Set this column to NOT NULL
--not-null-false TEXT Remove NOT NULL from this column
--pk TEXT Make this column the primary key
--pk-none Remove primary key (convert to rowid table)
--default ... Set default value for this column
--default-none TEXT Remove default from this column
--add-foreign-key ...
Add a foreign key constraint from a column to
another table with another column
--drop-foreign-key TEXT Drop foreign key constraint for this column
--sql Output SQL without executing it
--load-extension TEXT Path to SQLite extension, with optional
:entrypoint
-h, --help Show this message and exit.
.. _cli_ref_extract:
extract
=======
See :ref:`cli_extract`.
::
Usage: sqlite-utils extract [OPTIONS] PATH TABLE COLUMNS...
Extract one or more columns into a separate table
Example:
sqlite-utils extract trees.db Street_Trees species
Options:
--table TEXT Name of the other table to extract columns to
--fk-column TEXT Name of the foreign key column to add to the table
--rename ... Rename this column in extracted table
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_schema:
schema
======
See :ref:`cli_schema`.
::
Usage: sqlite-utils schema [OPTIONS] PATH [TABLES]...
Show full schema for this database or for specified tables
Example:
sqlite-utils schema trees.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_insert_files:
insert-files
============
See :ref:`cli_insert_files`.
::
Usage: sqlite-utils insert-files [OPTIONS] PATH TABLE FILE_OR_DIR...
Insert one or more files using BLOB columns in the specified table
Example:
sqlite-utils insert-files pics.db images *.gif \
-c name:name \
-c content:content \
-c content_hash:sha256 \
-c created:ctime_iso \
-c modified:mtime_iso \
-c size:size \
--pk name
Options:
-c, --column TEXT Column definitions for the table
--pk TEXT Column to use as primary key
--alter Alter table to add missing columns
--replace Replace files with matching primary key
--upsert Upsert files with matching primary key
--name TEXT File name to use
--text Store file content as TEXT, not BLOB
--encoding TEXT Character encoding for input, defaults to utf-8
-s, --silent Don't show a progress bar
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_analyze_tables:
analyze-tables
==============
See :ref:`cli_analyze_tables`.
::
Usage: sqlite-utils analyze-tables [OPTIONS] PATH [TABLES]...
Analyze the columns in one or more tables
Example:
sqlite-utils analyze-tables data.db trees
Options:
-c, --column TEXT Specific columns to analyze
--save Save results to _analyze_tables table
--common-limit INTEGER How many common values
--no-most Skip most common values
--no-least Skip least common values
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_convert:
convert
=======
See :ref:`cli_convert`.
::
Usage: sqlite-utils convert [OPTIONS] DB_PATH TABLE COLUMNS... CODE
Convert columns using Python code you supply. For example:
sqlite-utils convert my.db mytable mycolumn \
'"\n".join(textwrap.wrap(value, 10))' \
--import=textwrap
"value" is a variable with the column value to be converted.
Use "-" for CODE to read Python code from standard input.
The following common operations are available as recipe functions:
r.jsonsplit(value, delimiter=',', type=)
Convert a string like a,b,c into a JSON array ["a", "b", "c"]
r.parsedate(value, dayfirst=False, yearfirst=False, errors=None)
Parse a date and convert it to ISO date format: yyyy-mm-dd
- dayfirst=True: treat xx as the day in xx/yy/zz
- yearfirst=True: treat xx as the year in xx/yy/zz
- errors=r.IGNORE to ignore values that cannot be parsed
- errors=r.SET_NULL to set values that cannot be parsed to null
r.parsedatetime(value, dayfirst=False, yearfirst=False, errors=None)
Parse a datetime and convert it to ISO datetime format: yyyy-mm-ddTHH:MM:SS
- dayfirst=True: treat xx as the day in xx/yy/zz
- yearfirst=True: treat xx as the year in xx/yy/zz
- errors=r.IGNORE to ignore values that cannot be parsed
- errors=r.SET_NULL to set values that cannot be parsed to null
You can use these recipes like so:
sqlite-utils convert my.db mytable mycolumn \
'r.jsonsplit(value, delimiter=":")'
Options:
--import TEXT Python modules to import
--dry-run Show results of running this against first 10
rows
--multi Populate columns for keys in returned
dictionary
--where TEXT Optional where clause
-p, --param ... Named :parameters for where clause
--output TEXT Optional separate column to populate with the
output
--output-type [integer|float|blob|text]
Column type to use for the output column
--drop Drop original column afterwards
--no-skip-false Don't skip falsey values
-s, --silent Don't show a progress bar
--pdb Open pdb debugger on first error
-h, --help Show this message and exit.
.. _cli_ref_tables:
tables
======
See :ref:`cli_tables`.
::
Usage: sqlite-utils tables [OPTIONS] PATH
List the tables in the database
Example:
sqlite-utils tables trees.db
Options:
--fts4 Just show FTS4 enabled tables
--fts5 Just show FTS5 enabled tables
--counts Include row counts per table
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira, latex,
latex_booktabs, latex_longtable, latex_raw, mediawiki,
mixed_grid, mixed_outline, moinmoin, orgtbl, outline,
pipe, plain, presto, pretty, psql, rounded_grid,
rounded_outline, rst, simple, simple_grid,
simple_outline, textile, tsv, unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not escaped
strings
--columns Include list of columns for each table
--schema Include schema for each table
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_views:
views
=====
See :ref:`cli_views`.
::
Usage: sqlite-utils views [OPTIONS] PATH
List the views in the database
Example:
sqlite-utils views trees.db
Options:
--counts Include row counts per view
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira, latex,
latex_booktabs, latex_longtable, latex_raw, mediawiki,
mixed_grid, mixed_outline, moinmoin, orgtbl, outline,
pipe, plain, presto, pretty, psql, rounded_grid,
rounded_outline, rst, simple, simple_grid,
simple_outline, textile, tsv, unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not escaped
strings
--columns Include list of columns for each view
--schema Include schema for each view
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_rows:
rows
====
See :ref:`cli_rows`.
::
Usage: sqlite-utils rows [OPTIONS] PATH DBTABLE
Output all rows in the specified table
Example:
sqlite-utils rows trees.db Trees
Options:
-c, --column TEXT Columns to return
--where TEXT Optional where clause
-o, --order TEXT Order by ('column' or 'column desc')
-p, --param ... Named :parameters for where clause
--limit INTEGER Number of rows to return - defaults to everything
--offset INTEGER SQL offset to use
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira,
latex, latex_booktabs, latex_longtable, latex_raw,
mediawiki, mixed_grid, mixed_outline, moinmoin,
orgtbl, outline, pipe, plain, presto, pretty,
psql, rounded_grid, rounded_outline, rst, simple,
simple_grid, simple_outline, textile, tsv,
unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not
escaped strings
--load-extension TEXT Path to SQLite extension, with optional
:entrypoint
-h, --help Show this message and exit.
.. _cli_ref_triggers:
triggers
========
See :ref:`cli_triggers`.
::
Usage: sqlite-utils triggers [OPTIONS] PATH [TABLES]...
Show triggers configured in this database
Example:
sqlite-utils triggers trees.db
Options:
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira, latex,
latex_booktabs, latex_longtable, latex_raw, mediawiki,
mixed_grid, mixed_outline, moinmoin, orgtbl, outline,
pipe, plain, presto, pretty, psql, rounded_grid,
rounded_outline, rst, simple, simple_grid,
simple_outline, textile, tsv, unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not escaped
strings
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_indexes:
indexes
=======
See :ref:`cli_indexes`.
::
Usage: sqlite-utils indexes [OPTIONS] PATH [TABLES]...
Show indexes for the whole database or specific tables
Example:
sqlite-utils indexes trees.db Trees
Options:
--aux Include auxiliary columns
--nl Output newline-delimited JSON
--arrays Output rows as arrays instead of objects
--csv Output CSV
--tsv Output TSV
--no-headers Omit CSV headers
-t, --table Output as a formatted table
--fmt TEXT Table format - one of asciidoc, double_grid,
double_outline, fancy_grid, fancy_outline, github,
grid, heavy_grid, heavy_outline, html, jira, latex,
latex_booktabs, latex_longtable, latex_raw, mediawiki,
mixed_grid, mixed_outline, moinmoin, orgtbl, outline,
pipe, plain, presto, pretty, psql, rounded_grid,
rounded_outline, rst, simple, simple_grid,
simple_outline, textile, tsv, unsafehtml, youtrack
--json-cols Detect JSON cols and output them as JSON, not escaped
strings
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_create_database:
create-database
===============
See :ref:`cli_create_database`.
::
Usage: sqlite-utils create-database [OPTIONS] PATH
Create a new empty database file
Example:
sqlite-utils create-database trees.db
Options:
--enable-wal Enable WAL mode on the created database
--init-spatialite Enable SpatiaLite on the created database
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_create_table:
create-table
============
See :ref:`cli_create_table`.
::
Usage: sqlite-utils create-table [OPTIONS] PATH TABLE COLUMNS...
Add a table with the specified columns. Columns should be specified using
name, type pairs, for example:
sqlite-utils create-table my.db people \
id integer \
name text \
height float \
photo blob --pk id
Valid column types are text, integer, float and blob.
Options:
--pk TEXT Column to use as primary key
--not-null TEXT Columns that should be created as NOT NULL
--default ... Default value that should be set for a column
--fk ... Column, other table, other column to set as a
foreign key
--ignore If table already exists, do nothing
--replace If table already exists, replace it
--transform If table already exists, try to transform the schema
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
--strict Apply STRICT mode to created table
-h, --help Show this message and exit.
.. _cli_ref_create_index:
create-index
============
See :ref:`cli_create_index`.
::
Usage: sqlite-utils create-index [OPTIONS] PATH TABLE COLUMN...
Add an index to the specified table for the specified columns
Example:
sqlite-utils create-index chickens.db chickens name
To create an index in descending order:
sqlite-utils create-index chickens.db chickens -- -name
Options:
--name TEXT Explicit name for the new index
--unique Make this a unique index
--if-not-exists, --ignore Ignore if index already exists
--analyze Run ANALYZE after creating the index
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_enable_fts:
enable-fts
==========
See :ref:`cli_fts`.
::
Usage: sqlite-utils enable-fts [OPTIONS] PATH TABLE COLUMN...
Enable full-text search for specific table and columns"
Example:
sqlite-utils enable-fts chickens.db chickens name
Options:
--fts4 Use FTS4
--fts5 Use FTS5
--tokenize TEXT Tokenizer to use, e.g. porter
--create-triggers Create triggers to update the FTS tables when the
parent table changes.
--replace Replace existing FTS configuration if it exists
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_populate_fts:
populate-fts
============
::
Usage: sqlite-utils populate-fts [OPTIONS] PATH TABLE COLUMN...
Re-populate full-text search for specific table and columns
Example:
sqlite-utils populate-fts chickens.db chickens name
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_rebuild_fts:
rebuild-fts
===========
::
Usage: sqlite-utils rebuild-fts [OPTIONS] PATH [TABLES]...
Rebuild all or specific full-text search tables
Example:
sqlite-utils rebuild-fts chickens.db chickens
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_disable_fts:
disable-fts
===========
::
Usage: sqlite-utils disable-fts [OPTIONS] PATH TABLE
Disable full-text search for specific table
Example:
sqlite-utils disable-fts chickens.db chickens
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_optimize:
optimize
========
See :ref:`cli_optimize`.
::
Usage: sqlite-utils optimize [OPTIONS] PATH [TABLES]...
Optimize all full-text search tables and then run VACUUM - should shrink the
database file
Example:
sqlite-utils optimize chickens.db
Options:
--no-vacuum Don't run VACUUM
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_analyze:
analyze
=======
See :ref:`cli_analyze`.
::
Usage: sqlite-utils analyze [OPTIONS] PATH [NAMES]...
Run ANALYZE against the whole database, or against specific named indexes and
tables
Example:
sqlite-utils analyze chickens.db
Options:
-h, --help Show this message and exit.
.. _cli_ref_vacuum:
vacuum
======
See :ref:`cli_vacuum`.
::
Usage: sqlite-utils vacuum [OPTIONS] PATH
Run VACUUM against the database
Example:
sqlite-utils vacuum chickens.db
Options:
-h, --help Show this message and exit.
.. _cli_ref_dump:
dump
====
See :ref:`cli_dump`.
::
Usage: sqlite-utils dump [OPTIONS] PATH
Output a SQL dump of the schema and full contents of the database
Example:
sqlite-utils dump chickens.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_add_column:
add-column
==========
See :ref:`cli_add_column`.
::
Usage: sqlite-utils add-column [OPTIONS] PATH TABLE COL_NAME
[[integer|int|float|text|str|blob|bytes]]
Add a column to the specified table
Example:
sqlite-utils add-column chickens.db chickens weight float
Options:
--fk TEXT Table to reference as a foreign key
--fk-col TEXT Referenced column on that foreign key table - if
omitted will automatically use the primary key
--not-null-default TEXT Add NOT NULL DEFAULT 'TEXT' constraint
--ignore If column already exists, do nothing
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_add_foreign_key:
add-foreign-key
===============
See :ref:`cli_add_foreign_key`.
::
Usage: sqlite-utils add-foreign-key [OPTIONS] PATH TABLE COLUMN [OTHER_TABLE]
[OTHER_COLUMN]
Add a new foreign key constraint to an existing table
Example:
sqlite-utils add-foreign-key my.db books author_id authors id
Options:
--ignore If foreign key already exists, do nothing
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_add_foreign_keys:
add-foreign-keys
================
See :ref:`cli_add_foreign_keys`.
::
Usage: sqlite-utils add-foreign-keys [OPTIONS] PATH [FOREIGN_KEY]...
Add multiple new foreign key constraints to a database
Example:
sqlite-utils add-foreign-keys my.db \
books author_id authors id \
authors country_id countries id
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_index_foreign_keys:
index-foreign-keys
==================
See :ref:`cli_index_foreign_keys`.
::
Usage: sqlite-utils index-foreign-keys [OPTIONS] PATH
Ensure every foreign key column has an index on it
Example:
sqlite-utils index-foreign-keys chickens.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_enable_wal:
enable-wal
==========
See :ref:`cli_wal`.
::
Usage: sqlite-utils enable-wal [OPTIONS] PATH...
Enable WAL for database files
Example:
sqlite-utils enable-wal chickens.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_disable_wal:
disable-wal
===========
::
Usage: sqlite-utils disable-wal [OPTIONS] PATH...
Disable WAL for database files
Example:
sqlite-utils disable-wal chickens.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_enable_counts:
enable-counts
=============
See :ref:`cli_enable_counts`.
::
Usage: sqlite-utils enable-counts [OPTIONS] PATH [TABLES]...
Configure triggers to update a _counts table with row counts
Example:
sqlite-utils enable-counts chickens.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_reset_counts:
reset-counts
============
::
Usage: sqlite-utils reset-counts [OPTIONS] PATH
Reset calculated counts in the _counts table
Example:
sqlite-utils reset-counts chickens.db
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_duplicate:
duplicate
=========
See :ref:`cli_duplicate_table`.
::
Usage: sqlite-utils duplicate [OPTIONS] PATH TABLE NEW_TABLE
Create a duplicate of this table, copying across the schema and all row data.
Options:
--ignore If table does not exist, do nothing
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_rename_table:
rename-table
============
See :ref:`cli_renaming_tables`.
::
Usage: sqlite-utils rename-table [OPTIONS] PATH TABLE NEW_NAME
Rename this table.
Options:
--ignore If table does not exist, do nothing
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_drop_table:
drop-table
==========
See :ref:`cli_drop_table`.
::
Usage: sqlite-utils drop-table [OPTIONS] PATH TABLE
Drop the specified table
Example:
sqlite-utils drop-table chickens.db chickens
Options:
--ignore If table does not exist, do nothing
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_create_view:
create-view
===========
See :ref:`cli_create_view`.
::
Usage: sqlite-utils create-view [OPTIONS] PATH VIEW SELECT
Create a view for the provided SELECT query
Example:
sqlite-utils create-view chickens.db heavy_chickens \
'select * from chickens where weight > 3'
Options:
--ignore If view already exists, do nothing
--replace If view already exists, replace it
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_drop_view:
drop-view
=========
See :ref:`cli_drop_view`.
::
Usage: sqlite-utils drop-view [OPTIONS] PATH VIEW
Drop the specified view
Example:
sqlite-utils drop-view chickens.db heavy_chickens
Options:
--ignore If view does not exist, do nothing
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_install:
install
=======
See :ref:`cli_install`.
::
Usage: sqlite-utils install [OPTIONS] [PACKAGES]...
Install packages from PyPI into the same environment as sqlite-utils
Options:
-U, --upgrade Upgrade packages to latest version
-e, --editable TEXT Install a project in editable mode from this path
-h, --help Show this message and exit.
.. _cli_ref_uninstall:
uninstall
=========
See :ref:`cli_uninstall`.
::
Usage: sqlite-utils uninstall [OPTIONS] PACKAGES...
Uninstall Python packages from the sqlite-utils environment
Options:
-y, --yes Don't ask for confirmation
-h, --help Show this message and exit.
.. _cli_ref_add_geometry_column:
add-geometry-column
===================
See :ref:`cli_spatialite`.
::
Usage: sqlite-utils add-geometry-column [OPTIONS] DB_PATH TABLE COLUMN_NAME
Add a SpatiaLite geometry column to an existing table. Requires SpatiaLite
extension.
By default, this command will try to load the SpatiaLite extension from usual
paths. To load it from a specific path, use --load-extension.
Options:
-t, --type [POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION|GEOMETRY]
Specify a geometry type for this column.
[default: GEOMETRY]
--srid INTEGER Spatial Reference ID. See
https://spatialreference.org for details on
specific projections. [default: 4326]
--dimensions TEXT Coordinate dimensions. Use XYZ for three-
dimensional geometries.
--not-null Add a NOT NULL constraint.
--load-extension TEXT Path to SQLite extension, with optional
:entrypoint
-h, --help Show this message and exit.
.. _cli_ref_create_spatial_index:
create-spatial-index
====================
See :ref:`cli_spatialite_indexes`.
::
Usage: sqlite-utils create-spatial-index [OPTIONS] DB_PATH TABLE COLUMN_NAME
Create a spatial index on a SpatiaLite geometry column. The table and geometry
column must already exist before trying to add a spatial index.
By default, this command will try to load the SpatiaLite extension from usual
paths. To load it from a specific path, use --load-extension.
Options:
--load-extension TEXT Path to SQLite extension, with optional :entrypoint
-h, --help Show this message and exit.
.. _cli_ref_plugins:
plugins
=======
::
Usage: sqlite-utils plugins [OPTIONS]
List installed plugins
Options:
-h, --help Show this message and exit.
.. [[[end]]]
sqlite-utils-4.0a0/docs/cli.rst 0000664 0000000 0000000 00000253374 15007276064 0016513 0 ustar 00root root 0000000 0000000 .. _cli:
================================
sqlite-utils command-line tool
================================
The ``sqlite-utils`` command-line tool can be used to manipulate SQLite databases in a number of different ways.
Once :ref:`installed ` the tool should be available as ``sqlite-utils``. It can also be run using ``python -m sqlite_utils``.
.. contents:: :local:
:class: this-will-duplicate-information-and-it-is-still-useful-here
.. _cli_query:
Running SQL queries
===================
The ``sqlite-utils query`` command lets you run queries directly against a SQLite database file. This is the default subcommand, so the following two examples work the same way:
.. code-block:: bash
sqlite-utils query dogs.db "select * from dogs"
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs"
.. note::
In Python: :ref:`db.query() ` CLI reference: :ref:`sqlite-utils query `
.. _cli_query_json:
Returning JSON
--------------
The default format returned for queries is JSON:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs"
.. code-block:: output
[{"id": 1, "age": 4, "name": "Cleo"},
{"id": 2, "age": 2, "name": "Pancakes"}]
.. _cli_query_nl:
Newline-delimited JSON
~~~~~~~~~~~~~~~~~~~~~~
Use ``--nl`` to get back newline-delimited JSON objects:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --nl
.. code-block:: output
{"id": 1, "age": 4, "name": "Cleo"}
{"id": 2, "age": 2, "name": "Pancakes"}
.. _cli_query_arrays:
JSON arrays
~~~~~~~~~~~
You can use ``--arrays`` to request arrays instead of objects:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --arrays
.. code-block:: output
[[1, 4, "Cleo"],
[2, 2, "Pancakes"]]
You can also combine ``--arrays`` and ``--nl``:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --arrays --nl
.. code-block:: output
[1, 4, "Cleo"]
[2, 2, "Pancakes"]
If you want to pretty-print the output further, you can pipe it through ``python -mjson.tool``:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" | python -mjson.tool
.. code-block:: output
[
{
"id": 1,
"age": 4,
"name": "Cleo"
},
{
"id": 2,
"age": 2,
"name": "Pancakes"
}
]
.. _cli_query_binary_json:
Binary data in JSON
~~~~~~~~~~~~~~~~~~~
Binary strings are not valid JSON, so BLOB columns containing binary data will be returned as a JSON object containing base64 encoded data, that looks like this:
.. code-block:: bash
sqlite-utils dogs.db "select name, content from images" | python -mjson.tool
.. code-block:: output
[
{
"name": "transparent.gif",
"content": {
"$base64": true,
"encoded": "R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"
}
}
]
.. _cli_json_values:
Nested JSON values
~~~~~~~~~~~~~~~~~~
If one of your columns contains JSON, by default it will be returned as an escaped string:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" | python -mjson.tool
.. code-block:: output
[
{
"id": 1,
"name": "Cleo",
"friends": "[{\"name\": \"Pancakes\"}, {\"name\": \"Bailey\"}]"
}
]
You can use the ``--json-cols`` option to automatically detect these JSON columns and output them as nested JSON data:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --json-cols | python -mjson.tool
.. code-block:: output
[
{
"id": 1,
"name": "Cleo",
"friends": [
{
"name": "Pancakes"
},
{
"name": "Bailey"
}
]
}
]
.. _cli_query_csv:
Returning CSV or TSV
--------------------
You can use the ``--csv`` option to return results as CSV:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --csv
.. code-block:: output
id,age,name
1,4,Cleo
2,2,Pancakes
This will default to including the column names as a header row. To exclude the headers, use ``--no-headers``:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --csv --no-headers
.. code-block:: output
1,4,Cleo
2,2,Pancakes
Use ``--tsv`` instead of ``--csv`` to get back tab-separated values:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --tsv
.. code-block:: output
id age name
1 4 Cleo
2 2 Pancakes
.. _cli_query_table:
Table-formatted output
----------------------
You can use the ``--table`` option (or ``-t`` shortcut) to output query results as a table:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --table
.. code-block:: output
id age name
---- ----- --------
1 4 Cleo
2 2 Pancakes
You can use the ``--fmt`` option to specify different table formats, for example ``rst`` for reStructuredText:
.. code-block:: bash
sqlite-utils dogs.db "select * from dogs" --fmt rst
.. code-block:: output
==== ===== ========
id age name
==== ===== ========
1 4 Cleo
2 2 Pancakes
==== ===== ========
Available ``--fmt`` options are:
.. [[[cog
import tabulate
cog.out("\n" + "\n".join('- ``{}``'.format(t) for t in tabulate.tabulate_formats) + "\n\n")
.. ]]]
- ``asciidoc``
- ``double_grid``
- ``double_outline``
- ``fancy_grid``
- ``fancy_outline``
- ``github``
- ``grid``
- ``heavy_grid``
- ``heavy_outline``
- ``html``
- ``jira``
- ``latex``
- ``latex_booktabs``
- ``latex_longtable``
- ``latex_raw``
- ``mediawiki``
- ``mixed_grid``
- ``mixed_outline``
- ``moinmoin``
- ``orgtbl``
- ``outline``
- ``pipe``
- ``plain``
- ``presto``
- ``pretty``
- ``psql``
- ``rounded_grid``
- ``rounded_outline``
- ``rst``
- ``simple``
- ``simple_grid``
- ``simple_outline``
- ``textile``
- ``tsv``
- ``unsafehtml``
- ``youtrack``
.. [[[end]]]
This list can also be found by running ``sqlite-utils query --help``.
.. _cli_query_raw:
Returning raw data, such as binary content
------------------------------------------
If your table contains binary data in a ``BLOB`` you can use the ``--raw`` option to output specific columns directly to standard out.
For example, to retrieve a binary image from a ``BLOB`` column and store it in a file you can use the following:
.. code-block:: bash
sqlite-utils photos.db "select contents from photos where id=1" --raw > myphoto.jpg
To return the first column of each result as raw data, separated by newlines, use ``--raw-lines``:
.. code-block:: bash
sqlite-utils photos.db "select caption from photos" --raw-lines > captions.txt
.. _cli_query_parameters:
Using named parameters
----------------------
You can pass named parameters to the query using ``-p``:
.. code-block:: bash
sqlite-utils query dogs.db "select :num * :num2" -p num 5 -p num2 6
.. code-block:: output
[{":num * :num2": 30}]
These will be correctly quoted and escaped in the SQL query, providing a safe way to combine other values with SQL.
.. _cli_query_update_insert_delete:
UPDATE, INSERT and DELETE
-------------------------
If you execute an ``UPDATE``, ``INSERT`` or ``DELETE`` query the command will return the number of affected rows:
.. code-block:: bash
sqlite-utils dogs.db "update dogs set age = 5 where name = 'Cleo'"
.. code-block:: output
[{"rows_affected": 1}]
.. _cli_query_functions:
Defining custom SQL functions
-----------------------------
You can use the ``--functions`` option to pass a block of Python code that defines additional functions which can then be called by your SQL query.
This example defines a function which extracts the domain from a URL:
.. code-block:: bash
sqlite-utils query sites.db "select url, domain(url) from urls" --functions '
from urllib.parse import urlparse
def domain(url):
return urlparse(url).netloc
'
Every callable object defined in the block will be registered as a SQL function with the same name, with the exception of functions with names that begin with an underscore.
.. _cli_query_extensions:
SQLite extensions
-----------------
You can load SQLite extension modules using the ``--load-extension`` option, see :ref:`cli_load_extension`.
.. code-block:: bash
sqlite-utils dogs.db "select spatialite_version()" --load-extension=spatialite
.. code-block:: output
[{"spatialite_version()": "4.3.0a"}]
.. _cli_query_attach:
Attaching additional databases
------------------------------
SQLite supports cross-database SQL queries, which can join data from tables in more than one database file.
You can attach one or more additional databases using the ``--attach`` option, providing an alias to use for that database and the path to the SQLite file on disk.
This example attaches the ``books.db`` database under the alias ``books`` and then runs a query that combines data from that database with the default ``dogs.db`` database:
.. code-block:: bash
sqlite-utils dogs.db --attach books books.db \
'select * from sqlite_master union all select * from books.sqlite_master'
.. note::
In Python: :ref:`db.attach() `
.. _cli_memory:
Querying data directly using an in-memory database
==================================================
The ``sqlite-utils memory`` command works similar to ``sqlite-utils query``, but allows you to execute queries against an in-memory database.
You can also pass this command CSV or JSON files which will be loaded into a temporary in-memory table, allowing you to execute SQL against that data without a separate step to first convert it to SQLite.
Without any extra arguments, this command executes SQL against the in-memory database directly:
.. code-block:: bash
sqlite-utils memory 'select sqlite_version()'
.. code-block:: output
[{"sqlite_version()": "3.35.5"}]
It takes all of the same output formatting options as :ref:`sqlite-utils query `: ``--csv`` and ``--csv`` and ``--table`` and ``--nl``:
.. code-block:: bash
sqlite-utils memory 'select sqlite_version()' --csv
.. code-block:: output
sqlite_version()
3.35.5
.. code-block:: bash
sqlite-utils memory 'select sqlite_version()' --fmt grid
.. code-block:: output
+--------------------+
| sqlite_version() |
+====================+
| 3.35.5 |
+--------------------+
.. _cli_memory_csv_json:
Running queries directly against CSV or JSON
--------------------------------------------
If you have data in CSV or JSON format you can load it into an in-memory SQLite database and run queries against it directly in a single command using ``sqlite-utils memory`` like this:
.. code-block:: bash
sqlite-utils memory data.csv "select * from data"
You can pass multiple files to the command if you want to run joins between data from different files:
.. code-block:: bash
sqlite-utils memory one.csv two.json \
"select * from one join two on one.id = two.other_id"
If your data is JSON it should be the same format supported by the :ref:`sqlite-utils insert command ` - so either a single JSON object (treated as a single row) or a list of JSON objects.
CSV data can be comma- or tab- delimited.
The in-memory tables will be named after the files without their extensions. The tool also sets up aliases for those tables (using SQL views) as ``t1``, ``t2`` and so on, or you can use the alias ``t`` to refer to the first table:
.. code-block:: bash
sqlite-utils memory example.csv "select * from t"
If two files have the same name they will be assigned a numeric suffix:
.. code-block:: bash
sqlite-utils memory foo/data.csv bar/data.csv "select * from data_2"
To read from standard input, use either ``-`` or ``stdin`` as the filename - then use ``stdin`` or ``t`` or ``t1`` as the table name:
.. code-block:: bash
cat example.csv | sqlite-utils memory - "select * from stdin"
Incoming CSV data will be assumed to use ``utf-8``. If your data uses a different character encoding you can specify that with ``--encoding``:
.. code-block:: bash
cat example.csv | sqlite-utils memory - "select * from stdin" --encoding=latin-1
If you are joining across multiple CSV files they must all use the same encoding.
Column types will be automatically detected in CSV or TSV data, using the same mechanism as ``--detect-types`` described in :ref:`cli_insert_csv_tsv`. You can pass the ``--no-detect-types`` option to disable this automatic type detection and treat all CSV and TSV columns as ``TEXT``.
.. _cli_memory_explicit:
Explicitly specifying the format
--------------------------------
By default, ``sqlite-utils memory`` will attempt to detect the incoming data format (JSON, TSV or CSV) automatically.
You can instead specify an explicit format by adding a ``:csv``, ``:tsv``, ``:json`` or ``:nl`` (for newline-delimited JSON) suffix to the filename. For example:
.. code-block:: bash
sqlite-utils memory one.dat:csv two.dat:nl \
"select * from one union select * from two"
Here the contents of ``one.dat`` will be treated as CSV and the contents of ``two.dat`` will be treated as newline-delimited JSON.
To explicitly specify the format for data piped into the tool on standard input, use ``stdin:format`` - for example:
.. code-block:: bash
cat one.dat | sqlite-utils memory stdin:csv "select * from stdin"
.. _cli_memory_attach:
Joining in-memory data against existing databases using \-\-attach
------------------------------------------------------------------
The :ref:`attach option ` can be used to attach database files to the in-memory connection, enabling joins between in-memory data loaded from a file and tables in existing SQLite database files. An example:
.. code-block:: bash
echo "id\n1\n3\n5" | sqlite-utils memory - --attach trees trees.db \
"select * from trees.trees where rowid in (select id from stdin)"
Here the ``--attach trees trees.db`` option makes the ``trees.db`` database available with an alias of ``trees``.
``select * from trees.trees where ...`` can then query the ``trees`` table in that database.
The CSV data that was piped into the script is available in the ``stdin`` table, so ``... where rowid in (select id from stdin)`` can be used to return rows from the ``trees`` table that match IDs that were piped in as CSV content.
.. _cli_memory_schema_dump_save:
\-\-schema, \-\-analyze, \-\-dump and \-\-save
----------------------------------------------
To see the in-memory database schema that would be used for a file or for multiple files, use ``--schema``:
.. code-block:: bash
sqlite-utils memory dogs.csv --schema
.. code-block:: output
CREATE TABLE [dogs] (
[id] INTEGER,
[age] INTEGER,
[name] TEXT
);
CREATE VIEW t1 AS select * from [dogs];
CREATE VIEW t AS select * from [dogs];
You can run the equivalent of the :ref:`analyze-tables ` command using ``--analyze``:
.. code-block:: bash
sqlite-utils memory dogs.csv --analyze
.. code-block:: output
dogs.id: (1/3)
Total rows: 2
Null rows: 0
Blank rows: 0
Distinct values: 2
dogs.name: (2/3)
Total rows: 2
Null rows: 0
Blank rows: 0
Distinct values: 2
dogs.age: (3/3)
Total rows: 2
Null rows: 0
Blank rows: 0
Distinct values: 2
You can output SQL that will both create the tables and insert the full data used to populate the in-memory database using ``--dump``:
.. code-block:: bash
sqlite-utils memory dogs.csv --dump
.. code-block:: output
BEGIN TRANSACTION;
CREATE TABLE [dogs] (
[id] INTEGER,
[age] INTEGER,
[name] TEXT
);
INSERT INTO "dogs" VALUES('1','4','Cleo');
INSERT INTO "dogs" VALUES('2','2','Pancakes');
CREATE VIEW t1 AS select * from [dogs];
CREATE VIEW t AS select * from [dogs];
COMMIT;
Passing ``--save other.db`` will instead use that SQL to populate a new database file:
.. code-block:: bash
sqlite-utils memory dogs.csv --save dogs.db
These features are mainly intended as debugging tools - for much more finely grained control over how data is inserted into a SQLite database file see :ref:`cli_inserting_data` and :ref:`cli_insert_csv_tsv`.
.. _cli_rows:
Returning all rows in a table
=============================
You can return every row in a specified table using the ``rows`` command:
.. code-block:: bash
sqlite-utils rows dogs.db dogs
.. code-block:: output
[{"id": 1, "age": 4, "name": "Cleo"},
{"id": 2, "age": 2, "name": "Pancakes"}]
This command accepts the same output options as ``query`` - so you can pass ``--nl``, ``--csv``, ``--tsv``, ``--no-headers``, ``--table`` and ``--fmt``.
You can use the ``-c`` option to specify a subset of columns to return:
.. code-block:: bash
sqlite-utils rows dogs.db dogs -c age -c name
.. code-block:: output
[{"age": 4, "name": "Cleo"},
{"age": 2, "name": "Pancakes"}]
You can filter rows using a where clause with the ``--where`` option:
.. code-block:: bash
sqlite-utils rows dogs.db dogs -c name --where 'name = "Cleo"'
.. code-block:: output
[{"name": "Cleo"}]
Or pass named parameters using ``--where`` in combination with ``-p``:
.. code-block:: bash
sqlite-utils rows dogs.db dogs -c name --where 'name = :name' -p name Cleo
.. code-block:: output
[{"name": "Cleo"}]
You can define a sort order using ``--order column`` or ``--order 'column desc'``.
Use ``--limit N`` to only return the first ``N`` rows. Use ``--offset N`` to return rows starting from the specified offset.
.. note::
In Python: :ref:`table.rows ` CLI reference: :ref:`sqlite-utils rows `
.. _cli_tables:
Listing tables
==============
You can list the names of tables in a database using the ``tables`` command:
.. code-block:: bash
sqlite-utils tables mydb.db
.. code-block:: output
[{"table": "dogs"},
{"table": "cats"},
{"table": "chickens"}]
You can output this list in CSV using the ``--csv`` or ``--tsv`` options:
.. code-block:: bash
sqlite-utils tables mydb.db --csv --no-headers
.. code-block:: output
dogs
cats
chickens
If you just want to see the FTS4 tables, you can use ``--fts4`` (or ``--fts5`` for FTS5 tables):
.. code-block:: bash
sqlite-utils tables docs.db --fts4
.. code-block:: output
[{"table": "docs_fts"}]
Use ``--counts`` to include a count of the number of rows in each table:
.. code-block:: bash
sqlite-utils tables mydb.db --counts
.. code-block:: output
[{"table": "dogs", "count": 12},
{"table": "cats", "count": 332},
{"table": "chickens", "count": 9}]
Use ``--columns`` to include a list of columns in each table:
.. code-block:: bash
sqlite-utils tables dogs.db --counts --columns
.. code-block:: output
[{"table": "Gosh", "count": 0, "columns": ["c1", "c2", "c3"]},
{"table": "Gosh2", "count": 0, "columns": ["c1", "c2", "c3"]},
{"table": "dogs", "count": 2, "columns": ["id", "age", "name"]}]
Use ``--schema`` to include the schema of each table:
.. code-block:: bash
sqlite-utils tables dogs.db --schema --table
.. code-block:: output
table schema
------- -----------------------------------------------
Gosh CREATE TABLE Gosh (c1 text, c2 text, c3 text)
Gosh2 CREATE TABLE Gosh2 (c1 text, c2 text, c3 text)
dogs CREATE TABLE [dogs] (
[id] INTEGER,
[age] INTEGER,
[name] TEXT)
The ``--nl``, ``--csv``, ``--tsv``, ``--table`` and ``--fmt`` options are also available.
.. note::
In Python: :ref:`db.tables or db.table_names() ` CLI reference: :ref:`sqlite-utils tables `
.. _cli_views:
Listing views
=============
The ``views`` command shows any views defined in the database:
.. code-block:: bash
sqlite-utils views sf-trees.db --table --counts --columns --schema
.. code-block:: output
view count columns schema
--------- ------- -------------------- --------------------------------------------------------------
demo_view 189144 ['qSpecies'] CREATE VIEW demo_view AS select qSpecies from Street_Tree_List
hello 1 ['sqlite_version()'] CREATE VIEW hello as select sqlite_version()
It takes the same options as the ``tables`` command:
* ``--columns``
* ``--schema``
* ``--counts``
* ``--nl``
* ``--csv``
* ``--tsv``
* ``--table``
.. note::
In Python: :ref:`db.views or db.view_names() ` CLI reference: :ref:`sqlite-utils views `
.. _cli_indexes:
Listing indexes
===============
The ``indexes`` command lists any indexes configured for the database:
.. code-block:: bash
sqlite-utils indexes covid.db --table
.. code-block:: output
table index_name seqno cid name desc coll key
-------------------------------- ------------------------------------------------------ ------- ----- ----------------- ------ ------ -----
johns_hopkins_csse_daily_reports idx_johns_hopkins_csse_daily_reports_combined_key 0 12 combined_key 0 BINARY 1
johns_hopkins_csse_daily_reports idx_johns_hopkins_csse_daily_reports_country_or_region 0 1 country_or_region 0 BINARY 1
johns_hopkins_csse_daily_reports idx_johns_hopkins_csse_daily_reports_province_or_state 0 2 province_or_state 0 BINARY 1
johns_hopkins_csse_daily_reports idx_johns_hopkins_csse_daily_reports_day 0 0 day 0 BINARY 1
ny_times_us_counties idx_ny_times_us_counties_date 0 0 date 1 BINARY 1
ny_times_us_counties idx_ny_times_us_counties_fips 0 3 fips 0 BINARY 1
ny_times_us_counties idx_ny_times_us_counties_county 0 1 county 0 BINARY 1
ny_times_us_counties idx_ny_times_us_counties_state 0 2 state 0 BINARY 1
It shows indexes across all tables. To see indexes for specific tables, list those after the database:
.. code-block:: bash
sqlite-utils indexes covid.db johns_hopkins_csse_daily_reports --table
The command defaults to only showing the columns that are explicitly part of the index. To also include auxiliary columns use the ``--aux`` option - these columns will be listed with a ``key`` of ``0``.
The command takes the same format options as the ``tables`` and ``views`` commands.
.. note::
In Python: :ref:`table.indexes ` CLI reference: :ref:`sqlite-utils indexes `
.. _cli_triggers:
Listing triggers
================
The ``triggers`` command shows any triggers configured for the database:
.. code-block:: bash
sqlite-utils triggers global-power-plants.db --table
.. code-block:: output
name table sql
--------------- --------- -----------------------------------------------------------------
plants_insert plants CREATE TRIGGER [plants_insert] AFTER INSERT ON [plants]
BEGIN
INSERT OR REPLACE INTO [_counts]
VALUES (
'plants',
COALESCE(
(SELECT count FROM [_counts] WHERE [table] = 'plants'),
0
) + 1
);
END
It defaults to showing triggers for all tables. To see triggers for one or more specific tables pass their names as arguments:
.. code-block:: bash
sqlite-utils triggers global-power-plants.db plants
The command takes the same format options as the ``tables`` and ``views`` commands.
.. note::
In Python: :ref:`table.triggers or db.triggers ` CLI reference: :ref:`sqlite-utils triggers `
.. _cli_schema:
Showing the schema
==================
The ``sqlite-utils schema`` command shows the full SQL schema for the database:
.. code-block:: bash
sqlite-utils schema dogs.db
.. code-block:: output
CREATE TABLE "dogs" (
[id] INTEGER PRIMARY KEY,
[name] TEXT
);
This will show the schema for every table and index in the database. To view the schema just for a specified subset of tables pass those as additional arguments:
.. code-block:: bash
sqlite-utils schema dogs.db dogs chickens
.. note::
In Python: :ref:`table.schema ` or :ref:`db.schema ` CLI reference: :ref:`sqlite-utils schema `
.. _cli_analyze_tables:
Analyzing tables
================
When working with a new database it can be useful to get an idea of the shape of the data. The ``sqlite-utils analyze-tables`` command inspects specified tables (or all tables) and calculates some useful details about each of the columns in those tables.
To inspect the ``tags`` table in the ``github.db`` database, run the following:
.. code-block:: bash
sqlite-utils analyze-tables github.db tags
.. code-block:: output
tags.repo: (1/3)
Total rows: 261
Null rows: 0
Blank rows: 0
Distinct values: 14
Most common:
88: 107914493
75: 140912432
27: 206156866
Least common:
1: 209590345
2: 206649770
2: 303218369
tags.name: (2/3)
Total rows: 261
Null rows: 0
Blank rows: 0
Distinct values: 175
Most common:
10: 0.2
9: 0.1
7: 0.3
Least common:
1: 0.1.1
1: 0.11.1
1: 0.1a2
tags.sha: (3/3)
Total rows: 261
Null rows: 0
Blank rows: 0
Distinct values: 261
For each column this tool displays the number of null rows, the number of blank rows (rows that contain an empty string), the number of distinct values and, for columns that are not entirely distinct, the most common and least common values.
If you do not specify any tables every table in the database will be analyzed:
.. code-block:: bash
sqlite-utils analyze-tables github.db
If you wish to analyze one or more specific columns, use the ``-c`` option:
.. code-block:: bash
sqlite-utils analyze-tables github.db tags -c sha
To show more than 10 common values, use ``--common-limit 20``. To skip the most common or least common value analysis, use ``--no-most`` or ``--no-least``:
.. code-block:: bash
sqlite-utils analyze-tables github.db tags --common-limit 20 --no-least
.. _cli_analyze_tables_save:
Saving the analyzed table details
---------------------------------
``analyze-tables`` can take quite a while to run for large database files. You can save the results of the analysis to a database table called ``_analyze_tables_`` using the ``--save`` option:
.. code-block:: bash
sqlite-utils analyze-tables github.db --save
The ``_analyze_tables_`` table has the following schema:
.. code-block:: sql
CREATE TABLE [_analyze_tables_] (
[table] TEXT,
[column] TEXT,
[total_rows] INTEGER,
[num_null] INTEGER,
[num_blank] INTEGER,
[num_distinct] INTEGER,
[most_common] TEXT,
[least_common] TEXT,
PRIMARY KEY ([table], [column])
);
The ``most_common`` and ``least_common`` columns will contain nested JSON arrays of the most common and least common values that look like this:
.. code-block:: json
[
["Del Libertador, Av", 5068],
["Alberdi Juan Bautista Av.", 4612],
["Directorio Av.", 4552],
["Rivadavia, Av", 4532],
["Yerbal", 4512],
["CosquÃn", 4472],
["Estado Plurinacional de Bolivia", 4440],
["Gordillo Timoteo", 4424],
["Montiel", 4360],
["Condarco", 4288]
]
.. _cli_create_database:
Creating an empty database
==========================
You can create a new empty database file using the ``create-database`` command:
.. code-block:: bash
sqlite-utils create-database empty.db
To enable :ref:`cli_wal` on the newly created database add the ``--enable-wal`` option:
.. code-block:: bash
sqlite-utils create-database empty.db --enable-wal
To enable SpatiaLite metadata on a newly created database, add the ``--init-spatialite`` flag:
.. code-block:: bash
sqlite-utils create-database empty.db --init-spatialite
That will look for SpatiaLite in a set of predictable locations. To load it from somewhere else, use the ``--load-extension`` option:
.. code-block:: bash
sqlite-utils create-database empty.db --init-spatialite --load-extension /path/to/spatialite.so
.. _cli_inserting_data:
Inserting JSON data
===================
If you have data as JSON, you can use ``sqlite-utils insert tablename`` to insert it into a database. The table will be created with the correct (automatically detected) columns if it does not already exist.
You can pass in a single JSON object or a list of JSON objects, either as a filename or piped directly to standard-in (by using ``-`` as the filename).
Here's the simplest possible example:
.. code-block:: bash
echo '{"name": "Cleo", "age": 4}' | sqlite-utils insert dogs.db dogs -
To specify a column as the primary key, use ``--pk=column_name``.
To create a compound primary key across more than one column, use ``--pk`` multiple times.
If you feed it a JSON list it will insert multiple records. For example, if ``dogs.json`` looks like this:
.. code-block:: json
[
{
"id": 1,
"name": "Cleo",
"age": 4
},
{
"id": 2,
"name": "Pancakes",
"age": 2
},
{
"id": 3,
"name": "Toby",
"age": 6
}
]
You can import all three records into an automatically created ``dogs`` table and set the ``id`` column as the primary key like so:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.json --pk=id
Pass ``--pk`` multiple times to define a compound primary key.
You can skip inserting any records that have a primary key that already exists using ``--ignore``:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.json --pk=id --ignore
You can delete all the existing rows in the table before inserting the new records using ``--truncate``:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.json --truncate
You can add the ``--analyze`` option to run ``ANALYZE`` against the table after the rows have been inserted.
.. _cli_inserting_data_binary:
Inserting binary data
---------------------
You can insert binary data into a BLOB column by first encoding it using base64 and then structuring it like this:
.. code-block:: json
[
{
"name": "transparent.gif",
"content": {
"$base64": true,
"encoded": "R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"
}
}
]
.. _cli_inserting_data_nl_json:
Inserting newline-delimited JSON
--------------------------------
You can also import newline-delimited JSON (see `JSON Lines `__) using the ``--nl`` option:
.. code-block:: bash
echo '{"id": 1, "name": "Cleo"}
{"id": 2, "name": "Suna"}' | sqlite-utils insert creatures.db creatures - --nl
Newline-delimited JSON consists of full JSON objects separated by newlines.
If you are processing data using ``jq`` you can use the ``jq -c`` option to output valid newline-delimited JSON.
Since `Datasette `__ can export newline-delimited JSON, you can combine the Datasette and ``sqlite-utils`` like so:
.. code-block:: bash
curl -L "https://latest.datasette.io/fixtures/facetable.json?_shape=array&_nl=on" \
| sqlite-utils insert nl-demo.db facetable - --pk=id --nl
You can also pipe ``sqlite-utils`` together to create a new SQLite database file containing the results of a SQL query against another database:
.. code-block:: bash
sqlite-utils sf-trees.db \
"select TreeID, qAddress, Latitude, Longitude from Street_Tree_List" --nl \
| sqlite-utils insert saved.db trees - --nl
.. code-block:: bash
sqlite-utils saved.db "select * from trees limit 5" --csv
.. code-block:: output
TreeID,qAddress,Latitude,Longitude
141565,501X Baker St,37.7759676911831,-122.441396661871
232565,940 Elizabeth St,37.7517102172731,-122.441498017841
119263,495X Lakeshore Dr,,
207368,920 Kirkham St,37.760210314285,-122.47073935813
188702,1501 Evans Ave,37.7422086702947,-122.387293152263
.. _cli_inserting_data_flatten:
Flattening nested JSON objects
------------------------------
``sqlite-utils insert`` and ``sqlite-utils memory`` both expect incoming JSON data to consist of an array of JSON objects, where the top-level keys of each object will become columns in the created database table.
If your data is nested you can use the ``--flatten`` option to create columns that are derived from the nested data.
Consider this example document, in a file called ``log.json``:
.. code-block:: json
{
"httpRequest": {
"latency": "0.112114537s",
"requestMethod": "GET",
"requestSize": "534",
"status": 200
},
"insertId": "6111722f000b5b4c4d4071e2",
"labels": {
"service": "datasette-io"
}
}
Inserting this into a table using ``sqlite-utils insert logs.db logs log.json`` will create a table with the following schema:
.. code-block:: sql
CREATE TABLE [logs] (
[httpRequest] TEXT,
[insertId] TEXT,
[labels] TEXT
);
With the ``--flatten`` option columns will be created using ``topkey_nextkey`` column names - so running ``sqlite-utils insert logs.db logs log.json --flatten`` will create the following schema instead:
.. code-block:: sql
CREATE TABLE [logs] (
[httpRequest_latency] TEXT,
[httpRequest_requestMethod] TEXT,
[httpRequest_requestSize] TEXT,
[httpRequest_status] INTEGER,
[insertId] TEXT,
[labels_service] TEXT
);
.. _cli_insert_csv_tsv:
Inserting CSV or TSV data
=========================
If your data is in CSV format, you can insert it using the ``--csv`` option:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.csv --csv
For tab-delimited data, use ``--tsv``:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.tsv --tsv
Data is expected to be encoded as Unicode UTF-8. If your data is an another character encoding you can specify it using the ``--encoding`` option:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.tsv --tsv --encoding=latin-1
To stop inserting after a specified number of records - useful for getting a faster preview of a large file - use the ``--stop-after`` option:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.csv --csv --stop-after=10
A progress bar is displayed when inserting data from a file. You can hide the progress bar using the ``--silent`` option.
By default every column inserted from a CSV or TSV file will be of type ``TEXT``. To automatically detect column types - resulting in a mix of ``TEXT``, ``INTEGER`` and ``FLOAT`` columns, use the ``--detect-types`` option (or its shortcut ``-d``).
For example, given a ``creatures.csv`` file containing this:
.. code-block::
name,age,weight
Cleo,6,45.5
Dori,1,3.5
The following command:
.. code-block:: bash
sqlite-utils insert creatures.db creatures creatures.csv --csv --detect-types
Will produce this schema:
.. code-block:: bash
sqlite-utils schema creatures.db
.. code-block:: output
CREATE TABLE "creatures" (
[name] TEXT,
[age] INTEGER,
[weight] FLOAT
);
You can set the ``SQLITE_UTILS_DETECT_TYPES`` environment variable if you want ``--detect-types`` to be the default behavior:
.. code-block:: bash
export SQLITE_UTILS_DETECT_TYPES=1
If a CSV or TSV file includes empty cells, like this one:
::
name,age,weight
Cleo,6,
Dori,,3.5
They will be imported into SQLite as empty string values, ``""``.
To import them as ``NULL`` values instead, use the ``--empty-null`` option:
.. code-block:: bash
sqlite-utils insert creatures.db creatures creatures.csv --csv --empty-null
.. _cli_insert_csv_tsv_delimiter:
Alternative delimiters and quote characters
-------------------------------------------
If your file uses a delimiter other than ``,`` or a quote character other than ``"`` you can attempt to detect delimiters or you can specify them explicitly.
The ``--sniff`` option can be used to attempt to detect the delimiters:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.csv --sniff
Alternatively, you can specify them using the ``--delimiter`` and ``--quotechar`` options.
Here's a CSV file that uses ``;`` for delimiters and the ``|`` symbol for quote characters::
name;description
Cleo;|Very fine; a friendly dog|
Pancakes;A local corgi
You can import that using:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.csv --delimiter=";" --quotechar="|"
Passing ``--delimiter``, ``--quotechar`` or ``--sniff`` implies ``--csv``, so you can omit the ``--csv`` option.
.. _cli_insert_csv_tsv_no_header:
CSV files without a header row
------------------------------
The first row of any CSV or TSV file is expected to contain the names of the columns in that file.
If your file does not include this row, you can use the ``--no-headers`` option to specify that the tool should not use that fist row as headers.
If you do this, the table will be created with column names called ``untitled_1`` and ``untitled_2`` and so on. You can then rename them using the ``sqlite-utils transform ... --rename`` command, see :ref:`cli_transform_table`.
.. _cli_insert_unstructured:
Inserting unstructured data with \-\-lines and \-\-text
=======================================================
If you have an unstructured file you can insert its contents into a table with a single ``line`` column containing each line from the file using ``--lines``. This can be useful if you intend to further analyze those lines using SQL string functions or :ref:`sqlite-utils convert `:
.. code-block:: bash
sqlite-utils insert logs.db loglines logfile.log --lines
This will produce the following schema:
.. code-block:: sql
CREATE TABLE [loglines] (
[line] TEXT
);
You can also insert the entire contents of the file into a single column called ``text`` using ``--text``:
.. code-block:: bash
sqlite-utils insert content.db content file.txt --text
The schema here will be:
.. code-block:: sql
CREATE TABLE [content] (
[text] TEXT
);
.. _cli_insert_convert:
Applying conversions while inserting data
=========================================
The ``--convert`` option can be used to apply a Python conversion function to imported data before it is inserted into the database. It works in a similar way to :ref:`sqlite-utils convert `.
Your Python function will be passed a dictionary called ``row`` for each item that is being imported. You can modify that dictionary and return it - or return a fresh dictionary - to change the data that will be inserted.
Given a JSON file called ``dogs.json`` containing this:
.. code-block:: json
[
{"id": 1, "name": "Cleo"},
{"id": 2, "name": "Pancakes"}
]
The following command will insert that data and add an ``is_good`` column set to ``1`` for each dog:
.. code-block:: bash
sqlite-utils insert dogs.db dogs dogs.json --convert 'row["is_good"] = 1'
The ``--convert`` option also works with the ``--csv``, ``--tsv`` and ``--nl`` insert options.
As with ``sqlite-utils convert`` you can use ``--import`` to import additional Python modules, see :ref:`cli_convert_import` for details.
You can also pass code that runs some initialization steps and defines a ``convert(value)`` function, see :ref:`cli_convert_complex`.
.. _cli_insert_convert_lines:
\-\-convert with \-\-lines
--------------------------
Things work slightly differently when combined with the ``--lines`` or ``--text`` options.
With ``--lines``, instead of being passed a ``row`` dictionary your function will be passed a ``line`` string representing each line of the input. Given a file called ``access.log`` containing the following::
INFO: 127.0.0.1:60581 - GET / HTTP/1.1 200 OK
INFO: 127.0.0.1:60581 - GET /foo/-/static/app.css?cead5a HTTP/1.1 200 OK
You could convert it into structured data like so:
.. code-block:: bash
sqlite-utils insert logs.db loglines access.log --convert '
type, source, _, verb, path, _, status, _ = line.split()
return {
"type": type,
"source": source,
"verb": verb,
"path": path,
"status": status,
}' --lines
The resulting table would look like this:
====== =============== ====== ============================ ========
type source verb path status
====== =============== ====== ============================ ========
INFO: 127.0.0.1:60581 GET / 200
INFO: 127.0.0.1:60581 GET /foo/-/static/app.css?cead5a 200
====== =============== ====== ============================ ========
.. _cli_insert_convert_text:
\-\-convert with \-\-text
-------------------------
With ``--text`` the entire input to the command will be made available to the function as a variable called ``text``.
The function can return a single dictionary which will be inserted as a single row, or it can return a list or iterator of dictionaries, each of which will be inserted.
Here's how to use ``--convert`` and ``--text`` to insert one record per word in the input:
.. code-block:: bash
echo 'A bunch of words' | sqlite-utils insert words.db words - \
--text --convert '({"word": w} for w in text.split())'
The result looks like this:
.. code-block:: bash
sqlite-utils dump words.db
.. code-block:: output
BEGIN TRANSACTION;
CREATE TABLE [words] (
[word] TEXT
);
INSERT INTO "words" VALUES('A');
INSERT INTO "words" VALUES('bunch');
INSERT INTO "words" VALUES('of');
INSERT INTO "words" VALUES('words');
COMMIT;
.. _cli_insert_replace:
Insert-replacing data
=====================
The ``--replace`` option to ``insert`` causes any existing records with the same primary key to be replaced entirely by the new records.
To replace a dog with in ID of 2 with a new record, run the following:
.. code-block:: bash
echo '{"id": 2, "name": "Pancakes", "age": 3}' | \
sqlite-utils insert dogs.db dogs - --pk=id --replace
.. _cli_upsert:
Upserting data
==============
Upserting is update-or-insert. If a row exists with the specified primary key the provided columns will be updated. If no row exists that row will be created.
Unlike ``insert --replace``, an upsert will ignore any column values that exist but are not present in the upsert document.
For example:
.. code-block:: bash
echo '{"id": 2, "age": 4}' | \
sqlite-utils upsert dogs.db dogs - --pk=id
This will update the dog with an ID of 2 to have an age of 4, creating a new record (with a null name) if one does not exist. If a row DOES exist the name will be left as-is.
The command will fail if you reference columns that do not exist on the table. To automatically create missing columns, use the ``--alter`` option.
.. note::
``upsert`` in sqlite-utils 1.x worked like ``insert ... --replace`` does in 2.x. See `issue #66 `__ for details of this change.
.. _cli_bulk:
Executing SQL in bulk
=====================
If you have a JSON, newline-delimited JSON, CSV or TSV file you can execute a bulk SQL query using each of the records in that file using the ``sqlite-utils bulk`` command.
The command takes the database file, the SQL to be executed and the file containing records to be used when evaluating the SQL query.
The SQL query should include ``:named`` parameters that match the keys in the records.
For example, given a ``chickens.csv`` CSV file containing the following:
.. code-block::
id,name
1,Blue
2,Snowy
3,Azi
4,Lila
5,Suna
6,Cardi
You could insert those rows into a pre-created ``chickens`` table like so:
.. code-block:: bash
sqlite-utils bulk chickens.db \
'insert into chickens (id, name) values (:id, :name)' \
chickens.csv --csv
This command takes the same options as the ``sqlite-utils insert`` command - so it defaults to expecting JSON but can accept other formats using ``--csv`` or ``--tsv`` or ``--nl`` or other options described above.
By default all of the SQL queries will be executed in a single transaction. To commit every 20 records, use ``--batch-size 20``.
.. _cli_insert_files:
Inserting data from files
=========================
The ``insert-files`` command can be used to insert the content of files, along with their metadata, into a SQLite table.
Here's an example that inserts all of the GIF files in the current directory into a ``gifs.db`` database, placing the file contents in an ``images`` table:
.. code-block:: bash
sqlite-utils insert-files gifs.db images *.gif
You can also pass one or more directories, in which case every file in those directories will be added recursively:
.. code-block:: bash
sqlite-utils insert-files gifs.db images path/to/my-gifs
By default this command will create a table with the following schema:
.. code-block:: sql
CREATE TABLE [images] (
[path] TEXT PRIMARY KEY,
[content] BLOB,
[size] INTEGER
);
Content will be treated as binary by default and stored in a ``BLOB`` column. You can use the ``--text`` option to store that content in a ``TEXT`` column instead.
You can customize the schema using one or more ``-c`` options. For a table schema that includes just the path, MD5 hash and last modification time of the file, you would use this:
.. code-block:: bash
sqlite-utils insert-files gifs.db images *.gif -c path -c md5 -c mtime --pk=path
This will result in the following schema:
.. code-block:: sql
CREATE TABLE [images] (
[path] TEXT PRIMARY KEY,
[md5] TEXT,
[mtime] FLOAT
);
Note that there's no ``content`` column here at all - if you specify custom columns using ``-c`` you need to include ``-c content`` to create that column.
You can change the name of one of these columns using a ``-c colname:coldef`` parameter. To rename the ``mtime`` column to ``last_modified`` you would use this:
.. code-block:: bash
sqlite-utils insert-files gifs.db images *.gif \
-c path -c md5 -c last_modified:mtime --pk=path
You can pass ``--replace`` or ``--upsert`` to indicate what should happen if you try to insert a file with an existing primary key. Pass ``--alter`` to cause any missing columns to be added to the table.
The full list of column definitions you can use is as follows:
``name``
The name of the file, e.g. ``cleo.jpg``
``path``
The path to the file relative to the root folder, e.g. ``pictures/cleo.jpg``
``fullpath``
The fully resolved path to the image, e.g. ``/home/simonw/pictures/cleo.jpg``
``sha256``
The SHA256 hash of the file contents
``md5``
The MD5 hash of the file contents
``mode``
The permission bits of the file, as an integer - you may want to convert this to octal
``content``
The binary file contents, which will be stored as a BLOB
``content_text``
The text file contents, which will be stored as TEXT
``mtime``
The modification time of the file, as floating point seconds since the Unix epoch
``ctime``
The creation time of the file, as floating point seconds since the Unix epoch
``mtime_int``
The modification time as an integer rather than a float
``ctime_int``
The creation time as an integer rather than a float
``mtime_iso``
The modification time as an ISO timestamp, e.g. ``2020-07-27T04:24:06.654246``
``ctime_iso``
The creation time is an ISO timestamp
``size``
The integer size of the file in bytes
``stem``
The filename without the extension - for ``file.txt.gz`` this would be ``file.txt``
``suffix``
The file extension - for ``file.txt.gz`` this would be ``.gz``
You can insert data piped from standard input like this:
.. code-block:: bash
cat dog.jpg | sqlite-utils insert-files dogs.db pics - --name=dog.jpg
The ``-`` argument indicates data should be read from standard input. The string passed using the ``--name`` option will be used for the file name and path values.
When inserting data from standard input only the following column definitions are supported: ``name``, ``path``, ``content``, ``content_text``, ``sha256``, ``md5`` and ``size``.
.. _cli_convert:
Converting data in columns
==========================
The ``convert`` command can be used to transform the data in a specified column - for example to parse a date string into an ISO timestamp, or to split a string of tags into a JSON array.
The command accepts a database, table, one or more columns and a string of Python code to be executed against the values from those columns. The following example would replace the values in the ``headline`` column in the ``articles`` table with an upper-case version:
.. code-block:: bash
sqlite-utils convert content.db articles headline 'value.upper()'
The Python code is passed as a string. Within that Python code the ``value`` variable will be the value of the current column.
The code you provide will be compiled into a function that takes ``value`` as a single argument. If you break your function body into multiple lines the last line should be a ``return`` statement:
.. code-block:: bash
sqlite-utils convert content.db articles headline '
value = str(value)
return value.upper()'
Your code will be automatically wrapped in a function, but you can also define a function called ``convert(value)`` which will be called, if available:
.. code-block:: bash
sqlite-utils convert content.db articles headline '
def convert(value):
return value.upper()'
Use a ``CODE`` value of ``-`` to read from standard input:
.. code-block:: bash
cat mycode.py | sqlite-utils convert content.db articles headline -
Where ``mycode.py`` contains a fragment of Python code that looks like this:
.. code-block:: python
def convert(value):
return value.upper()
The conversion will be applied to every row in the specified table. You can limit that to just rows that match a ``WHERE`` clause using ``--where``:
.. code-block:: bash
sqlite-utils convert content.db articles headline 'value.upper()' \
--where "headline like '%cat%'"
You can include named parameters in your where clause and populate them using one or more ``--param`` options:
.. code-block:: bash
sqlite-utils convert content.db articles headline 'value.upper()' \
--where "headline like :query" \
--param query '%cat%'
The ``--dry-run`` option will output a preview of the conversion against the first ten rows, without modifying the database.
By default any rows with a falsey value for the column - such as ``0`` or ``null`` - will be skipped. Use the ``--no-skip-false`` option to disable this behaviour.
.. _cli_convert_import:
Importing additional modules
----------------------------
You can specify Python modules that should be imported and made available to your code using one or more ``--import`` options. This example uses the ``textwrap`` module to wrap the ``content`` column at 100 characters:
.. code-block:: bash
sqlite-utils convert content.db articles content \
'"\n".join(textwrap.wrap(value, 100))' \
--import=textwrap
This supports nested imports as well, for example to use `ElementTree `__:
.. code-block:: bash
sqlite-utils convert content.db articles content \
'xml.etree.ElementTree.fromstring(value).attrib["title"]' \
--import=xml.etree.ElementTree
.. _cli_convert_debugger:
Using the debugger
------------------
If an error occurs while running your conversion operation you may see a message like this::
user-defined function raised exception
Add the ``--pdb`` option to catch the error and open the Python debugger at that point. The conversion operation will exit after you type ``q`` in the debugger.
Here's an example debugging session. First, create a ``articles`` table with invalid XML in the ``content`` column:
.. code-block:: bash
echo '{"content": "This is not XML"}' | sqlite-utils insert content.db articles -
Now run the conversion with the ``--pdb`` option:
.. code-block:: bash
sqlite-utils convert content.db articles content \
'xml.etree.ElementTree.fromstring(value).attrib["title"]' \
--import=xml.etree.ElementTree \
--pdb
When the error occurs the debugger will open::
Exception raised, dropping into pdb...: syntax error: line 1, column 0
> .../python3.11/xml/etree/ElementTree.py(1338)XML()
-> parser.feed(text)
(Pdb) args
text = 'This is not XML'
parser =
(Pdb) q
``args`` here shows the arguments to the current function in the stack. The Python `pdb documentation `__ has full details on the other available commands.
.. _cli_convert_complex:
Defining a convert() function
-----------------------------
Instead of providing a single line of code to be executed against each value, you can define a function called ``convert(value)``.
This mechanism can be used to execute one-off initialization code that runs once at the start of the conversion run.
The following example adds a new ``score`` column, then updates it to list a random number - after first seeding the random number generator to ensure that multiple runs produce the same results:
.. code-block:: bash
sqlite-utils add-column content.db articles score float --not-null-default 1.0
sqlite-utils convert content.db articles score '
import random
random.seed(10)
def convert(value):
return random.random()
'
.. _cli_convert_recipes:
sqlite-utils convert recipes
----------------------------
Various built-in recipe functions are available for common operations. These are:
``r.jsonsplit(value, delimiter=',', type=)``
Convert a string like ``a,b,c`` into a JSON array ``["a", "b", "c"]``
The ``delimiter`` parameter can be used to specify a different delimiter.
The ``type`` parameter can be set to ``float`` or ``int`` to produce a JSON array of different types, for example if the column's string value was ``1.2,3,4.5`` the following::
r.jsonsplit(value, type=float)
Would produce an array like this: ``[1.2, 3.0, 4.5]``
``r.parsedate(value, dayfirst=False, yearfirst=False, errors=None)``
Parse a date and convert it to ISO date format: ``yyyy-mm-dd``
In the case of dates such as ``03/04/05`` U.S. ``MM/DD/YY`` format is assumed - you can use ``dayfirst=True`` or ``yearfirst=True`` to change how these ambiguous dates are interpreted.
Use the ``errors=`` parameter to specify what should happen if a value cannot be parsed.
By default, if any value cannot be parsed an error will be occurred and all values will be left as they were.
Set ``errors=r.IGNORE`` to ignore any values that cannot be parsed, leaving them unchanged.
Set ``errors=r.SET_NULL`` to set any values that cannot be parsed to ``null``.
``r.parsedatetime(value, dayfirst=False, yearfirst=False, errors=None)``
Parse a datetime and convert it to ISO datetime format: ``yyyy-mm-ddTHH:MM:SS``
These recipes can be used in the code passed to ``sqlite-utils convert`` like this:
.. code-block:: bash
sqlite-utils convert my.db mytable mycolumn \
'r.jsonsplit(value)'
To use any of the documented parameters, do this:
.. code-block:: bash
sqlite-utils convert my.db mytable mycolumn \
'r.jsonsplit(value, delimiter=":")'
.. _cli_convert_output:
Saving the result to a different column
---------------------------------------
The ``--output`` and ``--output-type`` options can be used to save the result of the conversion to a separate column, which will be created if that column does not already exist:
.. code-block:: bash
sqlite-utils convert content.db articles headline 'value.upper()' \
--output headline_upper
The type of the created column defaults to ``text``, but a different column type can be specified using ``--output-type``. This example will create a new floating point column called ``id_as_a_float`` with a copy of each item's ID increased by 0.5:
.. code-block:: bash
sqlite-utils convert content.db articles id 'float(value) + 0.5' \
--output id_as_a_float \
--output-type float
You can drop the original column at the end of the operation by adding ``--drop``.
.. _cli_convert_multi:
Converting a column into multiple columns
-----------------------------------------
Sometimes you may wish to convert a single column into multiple derived columns. For example, you may have a ``location`` column containing ``latitude,longitude`` values which you wish to split out into separate ``latitude`` and ``longitude`` columns.
You can achieve this using the ``--multi`` option to ``sqlite-utils convert``. This option expects your Python code to return a Python dictionary: new columns well be created and populated for each of the keys in that dictionary.
For the ``latitude,longitude`` example you would use the following:
.. code-block:: bash
sqlite-utils convert demo.db places location \
'bits = value.split(",")
return {
"latitude": float(bits[0]),
"longitude": float(bits[1]),
}' --multi
The type of the returned values will be taken into account when creating the new columns. In this example, the resulting database schema will look like this:
.. code-block:: sql
CREATE TABLE [places] (
[location] TEXT,
[latitude] FLOAT,
[longitude] FLOAT
);
The code function can also return ``None``, in which case its output will be ignored. You can drop the original column at the end of the operation by adding ``--drop``.
.. _cli_create_table:
Creating tables
===============
Most of the time creating tables by inserting example data is the quickest approach. If you need to create an empty table in advance of inserting data you can do so using the ``create-table`` command:
.. code-block:: bash
sqlite-utils create-table mydb.db mytable id integer name text --pk=id
This will create a table called ``mytable`` with two columns - an integer ``id`` column and a text ``name`` column. It will set the ``id`` column to be the primary key.
You can pass as many column-name column-type pairs as you like. Valid types are ``integer``, ``text``, ``float`` and ``blob``.
Pass ``--pk`` more than once for a compound primary key that covers multiple columns.
You can specify columns that should be NOT NULL using ``--not-null colname``. You can specify default values for columns using ``--default colname defaultvalue``.
.. code-block:: bash
sqlite-utils create-table mydb.db mytable \
id integer \
name text \
age integer \
is_good integer \
--not-null name \
--not-null age \
--default is_good 1 \
--pk=id
.. code-block:: bash
sqlite-utils tables mydb.db --schema -t
.. code-block:: output
table schema
------- --------------------------------
mytable CREATE TABLE [mytable] (
[id] INTEGER PRIMARY KEY,
[name] TEXT NOT NULL,
[age] INTEGER NOT NULL,
[is_good] INTEGER DEFAULT '1'
)
You can specify foreign key relationships between the tables you are creating using ``--fk colname othertable othercolumn``:
.. code-block:: bash
sqlite-utils create-table books.db authors \
id integer \
name text \
--pk=id
sqlite-utils create-table books.db books \
id integer \
title text \
author_id integer \
--pk=id \
--fk author_id authors id
.. code-block:: bash
sqlite-utils tables books.db --schema -t
.. code-block:: output
table schema
------- -------------------------------------------------
authors CREATE TABLE [authors] (
[id] INTEGER PRIMARY KEY,
[name] TEXT
)
books CREATE TABLE [books] (
[id] INTEGER PRIMARY KEY,
[title] TEXT,
[author_id] INTEGER REFERENCES [authors]([id])
)
You can create a table in `SQLite STRICT mode `__ using ``--strict``:
.. code-block:: bash
sqlite-utils create-table mydb.db mytable id integer name text --strict
.. code-block:: bash
sqlite-utils tables mydb.db --schema -t
.. code-block:: output
table schema
------- ------------------------
mytable CREATE TABLE [mytable] (
[id] INTEGER,
[name] TEXT
) STRICT
If a table with the same name already exists, you will get an error. You can choose to silently ignore this error with ``--ignore``, or you can replace the existing table with a new, empty table using ``--replace``.
You can also pass ``--transform`` to transform the existing table to match the new schema. See :ref:`python_api_explicit_create` in the Python library documentation for details of how this option works.
.. _cli_renaming_tables:
Renaming a table
================
Yo ucan rename a table using the ``rename-table`` command:
.. code-block:: bash
sqlite-utils rename-table mydb.db oldname newname
Pass ``--ignore`` to ignore any errors caused by the table not existing, or the new name already being in use.
.. _cli_duplicate_table:
Duplicating tables
==================
The ``duplicate`` command duplicates a table - creating a new table with the same schema and a copy of all of the rows:
.. code-block:: bash
sqlite-utils duplicate books.db authors authors_copy
.. _cli_drop_table:
Dropping tables
===============
You can drop a table using the ``drop-table`` command:
.. code-block:: bash
sqlite-utils drop-table mydb.db mytable
Use ``--ignore`` to ignore the error if the table does not exist.
.. _cli_transform_table:
Transforming tables
===================
The ``transform`` command allows you to apply complex transformations to a table that cannot be implemented using a regular SQLite ``ALTER TABLE`` command. See :ref:`python_api_transform` for details of how this works. The ``transform`` command preserves a table's ``STRICT`` mode.
.. code-block:: bash
sqlite-utils transform mydb.db mytable \
--drop column1 \
--rename column2 column_renamed
Every option for this table (with the exception of ``--pk-none``) can be specified multiple times. The options are as follows:
``--type column-name new-type``
Change the type of the specified column. Valid types are ``integer``, ``text``, ``float``, ``blob``.
``--drop column-name``
Drop the specified column.
``--rename column-name new-name``
Rename this column to a new name.
``--column-order column``
Use this multiple times to specify a new order for your columns. ``-o`` shortcut is also available.
``--not-null column-name``
Set this column as ``NOT NULL``.
``--not-null-false column-name``
For a column that is currently set as ``NOT NULL``, remove the ``NOT NULL``.
``--pk column-name``
Change the primary key column for this table. Pass ``--pk`` multiple times if you want to create a compound primary key.
``--pk-none``
Remove the primary key from this table, turning it into a ``rowid`` table.
``--default column-name value``
Set the default value of this column.
``--default-none column``
Remove the default value for this column.
``--drop-foreign-key column``
Drop the specified foreign key.
``--add-foreign-key column other_table other_column``
Add a foreign key constraint to ``column`` pointing to ``other_table.other_column``.
If you want to see the SQL that will be executed to make the change without actually executing it, add the ``--sql`` flag. For example:
.. code-block:: bash
sqlite-utils transform fixtures.db roadside_attractions \
--rename pk id \
--default name Untitled \
--column-order id \
--column-order longitude \
--column-order latitude \
--drop address \
--sql
.. code-block:: output
CREATE TABLE [roadside_attractions_new_4033a60276b9] (
[id] INTEGER PRIMARY KEY,
[longitude] FLOAT,
[latitude] FLOAT,
[name] TEXT DEFAULT 'Untitled'
);
INSERT INTO [roadside_attractions_new_4033a60276b9] ([longitude], [latitude], [id], [name])
SELECT [longitude], [latitude], [pk], [name] FROM [roadside_attractions];
DROP TABLE [roadside_attractions];
ALTER TABLE [roadside_attractions_new_4033a60276b9] RENAME TO [roadside_attractions];
.. _cli_transform_table_add_primary_key_to_rowid:
Adding a primary key to a rowid table
-------------------------------------
SQLite tables that are created without an explicit primary key are created as `rowid tables `__. They still have a numeric primary key which is available in the ``rowid`` column, but that column is not included in the output of ``select *``. Here's an example:
.. code-block:: bash
echo '[{"name": "Azi"}, {"name": "Suna"}]' | \
sqlite-utils insert chickens.db chickens -
sqlite-utils schema chickens.db
.. code-block:: output
CREATE TABLE [chickens] (
[name] TEXT
);
.. code-block:: bash
sqlite-utils chickens.db 'select * from chickens'
.. code-block:: output
[{"name": "Azi"},
{"name": "Suna"}]
.. code-block:: bash
sqlite-utils chickens.db 'select rowid, * from chickens'
.. code-block:: output
[{"rowid": 1, "name": "Azi"},
{"rowid": 2, "name": "Suna"}]
You can use ``sqlite-utils transform ... --pk id`` to add a primary key column called ``id`` to the table. The primary key will be created as an ``INTEGER PRIMARY KEY`` and the existing ``rowid`` values will be copied across to it. It will automatically increment as new rows are added to the table:
.. code-block:: bash
sqlite-utils transform chickens.db chickens --pk id
.. code-block:: bash
sqlite-utils schema chickens.db
.. code-block:: output
CREATE TABLE "chickens" (
[id] INTEGER PRIMARY KEY,
[name] TEXT
);
.. code-block:: bash
sqlite-utils chickens.db 'select * from chickens'
.. code-block:: output
[{"id": 1, "name": "Azi"},
{"id": 2, "name": "Suna"}]
.. code-block:: bash
echo '{"name": "Cardi"}' | sqlite-utils insert chickens.db chickens -
.. code-block:: bash
sqlite-utils chickens.db 'select * from chickens'
.. code-block:: output
[{"id": 1, "name": "Azi"},
{"id": 2, "name": "Suna"},
{"id": 3, "name": "Cardi"}]
.. _cli_extract:
Extracting columns into a separate table
========================================
The ``sqlite-utils extract`` command can be used to extract specified columns into a separate table.
Take a look at the Python API documentation for :ref:`python_api_extract` for a detailed description of how this works, including examples of table schemas before and after running an extraction operation.
The command takes a database, table and one or more columns that should be extracted. To extract the ``species`` column from the ``trees`` table you would run:
.. code-block:: bash
sqlite-utils extract my.db trees species
This would produce the following schema:
.. code-block:: sql
CREATE TABLE "trees" (
[id] INTEGER PRIMARY KEY,
[TreeAddress] TEXT,
[species_id] INTEGER,
FOREIGN KEY(species_id) REFERENCES species(id)
);
CREATE TABLE [species] (
[id] INTEGER PRIMARY KEY,
[species] TEXT
);
CREATE UNIQUE INDEX [idx_species_species]
ON [species] ([species]);
The command takes the following options:
``--table TEXT``
The name of the lookup to extract columns to. This defaults to using the name of the columns that are being extracted.
``--fk-column TEXT``
The name of the foreign key column to add to the table. Defaults to ``columnname_id``.
``--rename ``
Use this option to rename the columns created in the new lookup table.
``--silent``
Don't display the progress bar.
Here's a more complex example that makes use of these options. It converts `this CSV file `__ full of global power plants into SQLite, then extracts the ``country`` and ``country_long`` columns into a separate ``countries`` table:
.. code-block:: bash
wget 'https://github.com/wri/global-power-plant-database/blob/232a6666/output_database/global_power_plant_database.csv?raw=true'
sqlite-utils insert global.db power_plants \
'global_power_plant_database.csv?raw=true' --csv
# Extract those columns:
sqlite-utils extract global.db power_plants country country_long \
--table countries \
--fk-column country_id \
--rename country_long name
After running the above, the command ``sqlite-utils schema global.db`` reveals the following schema:
.. code-block:: sql
CREATE TABLE [countries] (
[id] INTEGER PRIMARY KEY,
[country] TEXT,
[name] TEXT
);
CREATE TABLE "power_plants" (
[country_id] INTEGER,
[name] TEXT,
[gppd_idnr] TEXT,
[capacity_mw] TEXT,
[latitude] TEXT,
[longitude] TEXT,
[primary_fuel] TEXT,
[other_fuel1] TEXT,
[other_fuel2] TEXT,
[other_fuel3] TEXT,
[commissioning_year] TEXT,
[owner] TEXT,
[source] TEXT,
[url] TEXT,
[geolocation_source] TEXT,
[wepp_id] TEXT,
[year_of_capacity_data] TEXT,
[generation_gwh_2013] TEXT,
[generation_gwh_2014] TEXT,
[generation_gwh_2015] TEXT,
[generation_gwh_2016] TEXT,
[generation_gwh_2017] TEXT,
[generation_data_source] TEXT,
[estimated_generation_gwh] TEXT,
FOREIGN KEY([country_id]) REFERENCES [countries]([id])
);
CREATE UNIQUE INDEX [idx_countries_country_name]
ON [countries] ([country], [name]);
.. _cli_create_view:
Creating views
==============
You can create a view using the ``create-view`` command:
.. code-block:: bash
sqlite-utils create-view mydb.db version "select sqlite_version()"
.. code-block:: bash
sqlite-utils mydb.db "select * from version"
.. code-block:: output
[{"sqlite_version()": "3.31.1"}]
Use ``--replace`` to replace an existing view of the same name, and ``--ignore`` to do nothing if a view already exists.
.. _cli_drop_view:
Dropping views
==============
You can drop a view using the ``drop-view`` command:
.. code-block:: bash
sqlite-utils drop-view myview
Use ``--ignore`` to ignore the error if the view does not exist.
.. _cli_add_column:
Adding columns
==============
You can add a column using the ``add-column`` command:
.. code-block:: bash
sqlite-utils add-column mydb.db mytable nameofcolumn text
The last argument here is the type of the column to be created. This can be one of:
- ``text`` or ``str``
- ``integer`` or ``int``
- ``float``
- ``blob`` or ``bytes``
This argument is optional and defaults to ``text``.
You can add a column that is a foreign key reference to another table using the ``--fk`` option:
.. code-block:: bash
sqlite-utils add-column mydb.db dogs species_id --fk species
This will automatically detect the name of the primary key on the species table and use that (and its type) for the new column.
You can explicitly specify the column you wish to reference using ``--fk-col``:
.. code-block:: bash
sqlite-utils add-column mydb.db dogs species_id --fk species --fk-col ref
You can set a ``NOT NULL DEFAULT 'x'`` constraint on the new column using ``--not-null-default``:
.. code-block:: bash
sqlite-utils add-column mydb.db dogs friends_count integer --not-null-default 0
.. _cli_add_column_alter:
Adding columns automatically on insert/update
=============================================
You can use the ``--alter`` option to automatically add new columns if the data you are inserting or upserting is of a different shape:
.. code-block:: bash
sqlite-utils insert dogs.db dogs new-dogs.json --pk=id --alter
.. _cli_add_foreign_key:
Adding foreign key constraints
==============================
The ``add-foreign-key`` command can be used to add new foreign key references to an existing table - something which SQLite's ``ALTER TABLE`` command does not support.
To add a foreign key constraint pointing the ``books.author_id`` column to ``authors.id`` in another table, do this:
.. code-block:: bash
sqlite-utils add-foreign-key books.db books author_id authors id
If you omit the other table and other column references ``sqlite-utils`` will attempt to guess them - so the above example could instead look like this:
.. code-block:: bash
sqlite-utils add-foreign-key books.db books author_id
Add ``--ignore`` to ignore an existing foreign key (as opposed to returning an error):
.. code-block:: bash
sqlite-utils add-foreign-key books.db books author_id --ignore
See :ref:`python_api_add_foreign_key` in the Python API documentation for further details, including how the automatic table guessing mechanism works.
.. _cli_add_foreign_keys:
Adding multiple foreign keys at once
------------------------------------
Adding a foreign key requires a ``VACUUM``. On large databases this can be an expensive operation, so if you are adding multiple foreign keys you can combine them into one operation (and hence one ``VACUUM``) using ``add-foreign-keys``:
.. code-block:: bash
sqlite-utils add-foreign-keys books.db \
books author_id authors id \
authors country_id countries id
When you are using this command each foreign key needs to be defined in full, as four arguments - the table, column, other table and other column.
.. _cli_index_foreign_keys:
Adding indexes for all foreign keys
-----------------------------------
If you want to ensure that every foreign key column in your database has a corresponding index, you can do so like this:
.. code-block:: bash
sqlite-utils index-foreign-keys books.db
.. _cli_defaults_not_null:
Setting defaults and not null constraints
=========================================
You can use the ``--not-null`` and ``--default`` options (to both ``insert`` and ``upsert``) to specify columns that should be ``NOT NULL`` or to set database defaults for one or more specific columns:
.. code-block:: bash
sqlite-utils insert dogs.db dogs_with_scores dogs-with-scores.json \
--not-null=age \
--not-null=name \
--default age 2 \
--default score 5
.. _cli_create_index:
Creating indexes
================
You can add an index to an existing table using the ``create-index`` command:
.. code-block:: bash
sqlite-utils create-index mydb.db mytable col1 [col2...]
This can be used to create indexes against a single column or multiple columns.
The name of the index will be automatically derived from the table and columns. To specify a different name, use ``--name=name_of_index``.
Use the ``--unique`` option to create a unique index.
Use ``--if-not-exists`` to avoid attempting to create the index if one with that name already exists.
To add an index on a column in descending order, prefix the column with a hyphen. Since this can be confused for a command-line option you need to construct that like this:
.. code-block:: bash
sqlite-utils create-index mydb.db mytable -- col1 -col2 col3
This will create an index on that table on ``(col1, col2 desc, col3)``.
If your column names are already prefixed with a hyphen you'll need to manually execute a ``CREATE INDEX`` SQL statement to add indexes to them rather than using this tool.
Add the ``--analyze`` option to run ``ANALYZE`` against the index after it has been created.
.. _cli_fts:
Configuring full-text search
============================
You can enable SQLite full-text search on a table and a set of columns like this:
.. code-block:: bash
sqlite-utils enable-fts mydb.db documents title summary
This will use SQLite's FTS5 module by default. Use ``--fts4`` if you want to use FTS4:
.. code-block:: bash
sqlite-utils enable-fts mydb.db documents title summary --fts4
The ``enable-fts`` command will populate the new index with all existing documents. If you later add more documents you will need to use ``populate-fts`` to cause them to be indexed as well:
.. code-block:: bash
sqlite-utils populate-fts mydb.db documents title summary
A better solution here is to use database triggers. You can set up database triggers to automatically update the full-text index using the ``--create-triggers`` option when you first run ``enable-fts``:
.. code-block:: bash
sqlite-utils enable-fts mydb.db documents title summary --create-triggers
To set a custom FTS tokenizer, e.g. to enable Porter stemming, use ``--tokenize=``:
.. code-block:: bash
sqlite-utils populate-fts mydb.db documents title summary --tokenize=porter
To remove the FTS tables and triggers you created, use ``disable-fts``:
.. code-block:: bash
sqlite-utils disable-fts mydb.db documents
To rebuild one or more FTS tables (see :ref:`python_api_fts_rebuild`), use ``rebuild-fts``:
.. code-block:: bash
sqlite-utils rebuild-fts mydb.db documents
You can rebuild every FTS table by running ``rebuild-fts`` without passing any table names:
.. code-block:: bash
sqlite-utils rebuild-fts mydb.db
.. _cli_search:
Executing searches
==================
Once you have configured full-text search for a table, you can search it using ``sqlite-utils search``:
.. code-block:: bash
sqlite-utils search mydb.db documents searchterm
This command accepts the same output options as ``sqlite-utils query``: ``--table``, ``--csv``, ``--tsv``, ``--nl`` etc.
By default it shows the most relevant matches first. You can specify a different sort order using the ``-o`` option, which can take a column or a column followed by ``desc``:
.. code-block:: bash
# Sort by rowid
sqlite-utils search mydb.db documents searchterm -o rowid
# Sort by created in descending order
sqlite-utils search mydb.db documents searchterm -o 'created desc'
SQLite `advanced search syntax `__ is enabled by default. To run a search with automatic quoting applied to the terms to avoid them being potentially interpreted as advanced search syntax use the ``--quote`` option.
You can specify a subset of columns to be returned using the ``-c`` option one or more times:
.. code-block:: bash
sqlite-utils search mydb.db documents searchterm -c title -c created
By default all search results will be returned. You can use ``--limit 20`` to return just the first 20 results.
Use the ``--sql`` option to output the SQL that would be executed, rather than running the query:
.. code-block:: bash
sqlite-utils search mydb.db documents searchterm --sql
.. code-block:: output
with original as (
select
rowid,
*
from [documents]
)
select
[original].*
from
[original]
join [documents_fts] on [original].rowid = [documents_fts].rowid
where
[documents_fts] match :query
order by
[documents_fts].rank
.. _cli_enable_counts:
Enabling cached counts
======================
``select count(*)`` queries can take a long time against large tables. ``sqlite-utils`` can speed these up by adding triggers to maintain a ``_counts`` table, see :ref:`python_api_cached_table_counts` for details.
The ``sqlite-utils enable-counts`` command can be used to configure these triggers, either for every table in the database or for specific tables.
.. code-block:: bash
# Configure triggers for every table in the database
sqlite-utils enable-counts mydb.db
# Configure triggers just for specific tables
sqlite-utils enable-counts mydb.db table1 table2
If the ``_counts`` table ever becomes out-of-sync with the actual table counts you can repair it using the ``reset-counts`` command:
.. code-block:: bash
sqlite-utils reset-counts mydb.db
.. _cli_analyze:
Optimizing index usage with ANALYZE
===================================
The `SQLite ANALYZE command `__ builds a table of statistics which the query planner can use to make better decisions about which indexes to use for a given query.
You should run ``ANALYZE`` if your database is large and you do not think your indexes are being efficiently used.
To run ``ANALYZE`` against every index in a database, use this:
.. code-block:: bash
sqlite-utils analyze mydb.db
You can run it against specific tables, or against specific named indexes, by passing them as optional arguments:
.. code-block:: bash
sqlite-utils analyze mydb.db mytable idx_mytable_name
You can also run ``ANALYZE`` as part of another command using the ``--analyze`` option. This is supported by the ``create-index``, ``insert`` and ``upsert`` commands.
.. _cli_vacuum:
Vacuum
======
You can run VACUUM to optimize your database like so:
.. code-block:: bash
sqlite-utils vacuum mydb.db
.. _cli_optimize:
Optimize
========
The optimize command can dramatically reduce the size of your database if you are using SQLite full-text search. It runs OPTIMIZE against all of your FTS4 and FTS5 tables, then runs VACUUM.
If you just want to run OPTIMIZE without the VACUUM, use the ``--no-vacuum`` flag.
.. code-block:: bash
# Optimize all FTS tables and then VACUUM
sqlite-utils optimize mydb.db
# Optimize but skip the VACUUM
sqlite-utils optimize --no-vacuum mydb.db
To optimize specific tables rather than every FTS table, pass those tables as extra arguments:
.. code-block:: bash
sqlite-utils optimize mydb.db table_1 table_2
.. _cli_wal:
WAL mode
========
You can enable `Write-Ahead Logging `__ for a database file using the ``enable-wal`` command:
.. code-block:: bash
sqlite-utils enable-wal mydb.db
You can disable WAL mode using ``disable-wal``:
.. code-block:: bash
sqlite-utils disable-wal mydb.db
Both of these commands accept one or more database files as arguments.
.. _cli_dump:
Dumping the database to SQL
===========================
The ``dump`` command outputs a SQL dump of the schema and full contents of the specified database file:
.. code-block:: bash
sqlite-utils dump mydb.db
BEGIN TRANSACTION;
CREATE TABLE ...
...
COMMIT;
.. _cli_load_extension:
Loading SQLite extensions
=========================
Many of these commands have the ability to load additional SQLite extensions using the ``--load-extension=/path/to/extension`` option - use ``--help`` to check for support, e.g. ``sqlite-utils rows --help``.
This option can be applied multiple times to load multiple extensions.
Since `SpatiaLite `__ is commonly used with SQLite, the value ``spatialite`` is special: it will search for SpatiaLite in the most common installation locations, saving you from needing to remember exactly where that module is located:
.. code-block:: bash
sqlite-utils memory "select spatialite_version()" --load-extension=spatialite
.. code-block:: output
[{"spatialite_version()": "4.3.0a"}]
.. _cli_spatialite:
SpatiaLite helpers
==================
`SpatiaLite `_ adds geographic capability to SQLite (similar to how PostGIS builds on PostgreSQL). The `SpatiaLite cookbook `__ is a good resource for learning what's possible with it.
You can convert an existing table to a geographic table by adding a geometry column, use the ``sqlite-utils add-geometry-column`` command:
.. code-block:: bash
sqlite-utils add-geometry-column spatial.db locations geometry --type POLYGON --srid 4326
The table (``locations`` in the example above) must already exist before adding a geometry column. Use ``sqlite-utils create-table`` first, then ``add-geometry-column``.
Use the ``--type`` option to specify a geometry type. By default, ``add-geometry-column`` uses a generic ``GEOMETRY``, which will work with any type, though it may not be supported by some desktop GIS applications.
Eight (case-insensitive) types are allowed:
* POINT
* LINESTRING
* POLYGON
* MULTIPOINT
* MULTILINESTRING
* MULTIPOLYGON
* GEOMETRYCOLLECTION
* GEOMETRY
.. _cli_spatialite_indexes:
Adding spatial indexes
----------------------
Once you have a geometry column, you can speed up bounding box queries by adding a spatial index:
.. code-block:: bash
sqlite-utils create-spatial-index spatial.db locations geometry
See this `SpatiaLite Cookbook recipe `__ for examples of how to use a spatial index.
.. _cli_install:
Installing packages
===================
The :ref:`convert command ` and the :ref:`insert -\\-convert ` and :ref:`query -\\-functions ` options can be provided with a Python script that imports additional modules from the ``sqlite-utils`` environment.
You can install packages from PyPI directly into the correct environment using ``sqlite-utils install ``. This is a wrapper around ``pip install``.
.. code-block:: bash
sqlite-utils install beautifulsoup4
Use ``-U`` to upgrade an existing package.
.. _cli_uninstall:
Uninstalling packages
=====================
You can uninstall packages that were installed using ``sqlite-utils install`` with ``sqlite-utils uninstall ``:
.. code-block:: bash
sqlite-utils uninstall beautifulsoup4
Use ``-y`` to skip the request for confirmation.
sqlite-utils-4.0a0/docs/codespell-ignore-words.txt 0000664 0000000 0000000 00000000005 15007276064 0022317 0 ustar 00root root 0000000 0000000 doub
sqlite-utils-4.0a0/docs/conf.py 0000664 0000000 0000000 00000013062 15007276064 0016475 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
from subprocess import Popen, PIPE
from beanbag_docutils.sphinx.ext.github import github_linkcode_resolve
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.extlinks",
"sphinx.ext.autodoc",
"sphinx_copybutton",
"sphinx.ext.linkcode",
]
autodoc_member_order = "bysource"
autodoc_typehints = "description"
extlinks = {
"issue": ("https://github.com/simonw/sqlite-utils/issues/%s", "#%s"),
}
def linkcode_resolve(domain, info):
return github_linkcode_resolve(
domain=domain,
info=info,
allowed_module_names=["sqlite_utils"],
github_org_id="simonw",
github_repo_id="sqlite-utils",
branch="main",
)
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = ".rst"
# The master toctree document.
master_doc = "index"
# General information about the project.
project = "sqlite-utils"
copyright = "2018-2022, Simon Willison"
author = "Simon Willison"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
pipe = Popen("git describe --tags --always", stdout=PIPE, shell=True)
git_version = pipe.stdout.read().decode("utf8")
if git_version:
version = git_version.rsplit("-", 1)[0]
release = git_version
else:
version = ""
release = ""
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# Only syntax highlight of code-block is used:
highlight_language = "none"
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "furo"
html_title = "sqlite-utils"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
html_js_files = ["js/custom.js"]
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = "sqlite-utils-doc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(
master_doc,
"sqlite-utils.tex",
"sqlite-utils documentation",
"Simon Willison",
"manual",
)
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "sqlite-utils", "sqlite-utils documentation", [author], 1)]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"sqlite-utils",
"sqlite-utils documentation",
author,
"sqlite-utils",
"Python library for manipulating SQLite databases",
"Miscellaneous",
)
]
sqlite-utils-4.0a0/docs/contributing.rst 0000664 0000000 0000000 00000012477 15007276064 0020450 0 ustar 00root root 0000000 0000000 .. _contributing:
==============
Contributing
==============
Development of ``sqlite-utils`` takes place in the `sqlite-utils GitHub repository `__.
All improvements to the software should start with an issue. Read `How I build a feature `__ for a detailed description of the recommended process for building bug fixes or enhancements.
.. _contributing_checkout:
Obtaining the code
==================
To work on this library locally, first checkout the code. Then create a new virtual environment::
git clone git@github.com:simonw/sqlite-utils
cd sqlite-utils
python3 -mvenv venv
source venv/bin/activate
Or if you are using ``pipenv``::
pipenv shell
Within the virtual environment running ``sqlite-utils`` should run your locally editable version of the tool. You can use ``which sqlite-utils`` to confirm that you are running the version that lives in your virtual environment.
.. _contributing_tests:
Running the tests
=================
To install the dependencies and test dependencies::
pip install -e '.[test]'
To run the tests::
pytest
.. _contributing_docs:
Building the documentation
==========================
To build the documentation, first install the documentation dependencies::
pip install -e '.[docs]'
Then run ``make livehtml`` from the ``docs/`` directory to start a server on port 8000 that will serve the documentation and live-reload any time you make an edit to a ``.rst`` file::
cd docs
make livehtml
The `cog `__ tool is used to maintain portions of the documentation. You can run it like so::
cog -r docs/*.rst
.. _contributing_linting:
Linting and formatting
======================
``sqlite-utils`` uses `Black `__ for code formatting, and `flake8 `__ and `mypy `__ for linting and type checking.
Black is installed as part of ``pip install -e '.[test]'`` - you can then format your code by running it in the root of the project::
black .
To install ``mypy`` and ``flake8`` run the following::
pip install -e '.[flake8,mypy]'
Both commands can then be run in the root of the project like this::
flake8
mypy sqlite_utils
All three of these tools are run by our CI mechanism against every commit and pull request.
.. _contributing_just:
Using Just and pipenv
=====================
If you install `Just `__ and `pipenv `__ you can use them to manage your local development environment.
To create a virtual environment and install all development dependencies, run::
cd sqlite-utils
just init
To run all of the tests and linters::
just
To run tests, or run a specific test module or test by name::
just test # All tests
just test tests/test_cli_memory.py # Just this module
just test -k test_memory_no_detect_types # Just this test
To run just the linters::
just lint
To apply Black to your code::
just black
To update documentation using Cog::
just cog
To run the live documentation server (this will run Cog first)::
just docs
And to list all available commands::
just -l
.. _release_process:
Release process
===============
Releases are performed using tags. When a new release is published on GitHub, a `GitHub Actions workflow `__ will perform the following:
* Run the unit tests against all supported Python versions. If the tests pass...
* Build a wheel bundle of the underlying Python source code
* Push that new wheel up to PyPI: https://pypi.org/project/sqlite-utils/
To deploy new releases you will need to have push access to the GitHub repository.
``sqlite-utils`` follows `Semantic Versioning `__::
major.minor.patch
We increment ``major`` for backwards-incompatible releases.
We increment ``minor`` for new features.
We increment ``patch`` for bugfix releass.
To release a new version, first create a commit that updates the version number in ``setup.py`` and the :ref:`the changelog ` with highlights of the new version. An example `commit can be seen here `__::
# Update changelog
git commit -m " Release 3.29
Refs #423, #458, #467, #469, #470, #471, #472, #475" -a
git push
Referencing the issues that are part of the release in the commit message ensures the name of the release shows up on those issue pages, e.g. `here `__.
You can generate the list of issue references for a specific release by copying and pasting text from the release notes or GitHub changes-since-last-release view into this `Extract issue numbers from pasted text `__ tool.
To create the tag for the release, create `a new release `__ on GitHub matching the new version number. You can convert the release notes to Markdown by copying and pasting the rendered HTML into this `Paste to Markdown tool `__.
sqlite-utils-4.0a0/docs/index.rst 0000664 0000000 0000000 00000003265 15007276064 0017043 0 ustar 00root root 0000000 0000000 =======================
sqlite-utils |version|
=======================
|PyPI| |Changelog| |CI| |License| |discord|
.. |PyPI| image:: https://img.shields.io/pypi/v/sqlite-utils.svg
:target: https://pypi.org/project/sqlite-utils/
.. |Changelog| image:: https://img.shields.io/github/v/release/simonw/sqlite-utils?include_prereleases&label=changelog
:target: https://sqlite-utils.datasette.io/en/stable/changelog.html
.. |CI| image:: https://github.com/simonw/sqlite-utils/workflows/Test/badge.svg
:target: https://github.com/simonw/sqlite-utils/actions
.. |License| image:: https://img.shields.io/badge/license-Apache%202.0-blue.svg
:target: https://github.com/simonw/sqlite-utils/blob/main/LICENSE
.. |discord| image:: https://img.shields.io/discord/823971286308356157?label=discord
:target: https://discord.gg/Ass7bCAMDw
*CLI tool and Python library for manipulating SQLite databases*
This library and command-line utility helps create SQLite databases from an existing collection of data.
Most of the functionality is available as either a Python API or through the ``sqlite-utils`` command-line tool.
sqlite-utils is not intended to be a full ORM: the focus is utility helpers to make creating the initial database and populating it with data as productive as possible.
It is designed as a useful complement to `Datasette `_.
`Cleaning data with sqlite-utils and Datasette `_ provides a tutorial introduction (and accompanying ten minute video) about using this tool.
Contents
--------
.. toctree::
:maxdepth: 3
installation
cli
python-api
plugins
reference
cli-reference
contributing
changelog
sqlite-utils-4.0a0/docs/installation.rst 0000664 0000000 0000000 00000006000 15007276064 0020423 0 ustar 00root root 0000000 0000000 .. _installation:
==============
Installation
==============
``sqlite-utils`` is tested on Linux, macOS and Windows.
.. _installation_homebrew:
Using Homebrew
==============
The :ref:`sqlite-utils command-line tool ` can be installed on macOS using Homebrew::
brew install sqlite-utils
If you have it installed and want to upgrade to the most recent release, you can run::
brew upgrade sqlite-utils
Then run ``sqlite-utils --version`` to confirm the installed version.
.. _installation_pip:
Using pip
=========
The `sqlite-utils package `__ on PyPI includes both the :ref:`sqlite_utils Python library ` and the ``sqlite-utils`` command-line tool. You can install them using ``pip`` like so::
pip install sqlite-utils
.. _installation_pipx:
Using pipx
==========
`pipx `__ is a tool for installing Python command-line applications in their own isolated environments. You can use ``pipx`` to install the ``sqlite-utils`` command-line tool like this::
pipx install sqlite-utils
.. _installation_sqlite3_alternatives:
Alternatives to sqlite3
=======================
By default, ``sqlite-utils`` uses the ``sqlite3`` package bundled with the Python standard library.
Depending on your operating system, this may come with some limitations.
On some platforms the ability to load additional extensions (via ``conn.load_extension(...)`` or ``--load-extension=/path/to/extension``) may be disabled.
You may also see the error ``sqlite3.OperationalError: table sqlite_master may not be modified`` when trying to alter an existing table.
You can work around these limitations by installing either the `pysqlite3 `__ package or the `sqlean.py `__ package, both of which provide drop-in replacements for the standard library ``sqlite3`` module but with a recent version of SQLite and full support for loading extensions.
To install ``sqlean.py`` (which has compiled binary wheels available for all major platforms) run the following:
.. code-block:: bash
sqlite-utils install sqlean.py
``pysqlite3`` and ``sqlean.py`` do not provide implementations of the ``.iterdump()`` method. To use that method (see :ref:`python_api_itedump`) or the ``sqlite-utils dump`` command you should also install the ``sqlite-dump`` package:
.. code-block:: bash
sqlite-utils install sqlite-dump
.. _installation_completion:
Setting up shell completion
===========================
You can configure shell tab completion for the ``sqlite-utils`` command using these commands.
For ``bash``:
.. code-block:: bash
eval "$(_SQLITE_UTILS_COMPLETE=bash_source sqlite-utils)"
For ``zsh``:
.. code-block:: zsh
eval "$(_SQLITE_UTILS_COMPLETE=zsh_source sqlite-utils)"
Add this code to ``~/.zshrc`` or ``~/.bashrc`` to automatically run it when you start a new shell.
See `the Click documentation `__ for more details.sqlite-utils-4.0a0/docs/plugins.rst 0000664 0000000 0000000 00000011000 15007276064 0017377 0 ustar 00root root 0000000 0000000 .. _plugins:
=========
Plugins
=========
``sqlite-utils`` supports plugins, which can be used to add extra features to the software.
Plugins can add new commands, for example ``sqlite-utils some-command ...``
Plugins can be installed using the ``sqlite-utils install`` command:
.. code-block:: bash
sqlite-utils install sqlite-utils-name-of-plugin
You can see a JSON list of plugins that have been installed by running this:
.. code-block:: bash
sqlite-utils plugins
Plugin hooks such as :ref:`plugins_hooks_prepare_connection` affect each instance of the ``Database`` class. You can opt-out of these plugins by creating that class instance like so:
.. code-block:: python
db = Database(memory=True, execute_plugins=False)
.. _plugins_building:
Building a plugin
-----------------
Plugins are created in a directory named after the plugin. To create a "hello world" plugin, first create a ``hello-world`` directory:
.. code-block:: bash
mkdir hello-world
cd hello-world
In that folder create two files. The first is a ``pyproject.toml`` file describing the plugin:
.. code-block:: toml
[project]
name = "sqlite-utils-hello-world"
version = "0.1"
[project.entry-points.sqlite_utils]
hello_world = "sqlite_utils_hello_world"
The ``[project.entry-points.sqlite_utils]`` section tells ``sqlite-utils`` which module to load when executing the plugin.
Then create ``sqlite_utils_hello_world.py`` with the following content:
.. code-block:: python
import click
import sqlite_utils
@sqlite_utils.hookimpl
def register_commands(cli):
@cli.command()
def hello_world():
"Say hello world"
click.echo("Hello world!")
Install the plugin in "editable" mode - so you can make changes to the code and have them picked up instantly by ``sqlite-utils`` - like this:
.. code-block:: bash
sqlite-utils install -e .
Or pass the path to your plugin directory:
.. code-block:: bash
sqlite-utils install -e /dev/sqlite-utils-hello-world
Now, running this should execute your new command:
.. code-block:: bash
sqlite-utils hello-world
Your command will also be listed in the output of ``sqlite-utils --help``.
See the `LLM plugin documentation `__ for tips on distributing your plugin.
.. _plugins_hooks:
Plugin hooks
------------
Plugin hooks allow ``sqlite-utils`` to be customized.
.. _plugins_hooks_register_commands:
register_commands(cli)
~~~~~~~~~~~~~~~~~~~~~~
This hook can be used to register additional commands with the ``sqlite-utils`` CLI. It is called with the ``cli`` object, which is a ``click.Group`` instance.
Example implementation:
.. code-block:: python
import click
import sqlite_utils
@sqlite_utils.hookimpl
def register_commands(cli):
@cli.command()
def hello_world():
"Say hello world"
click.echo("Hello world!")
New commands implemented by plugins can invoke existing commands using the `context.invoke `__ mechanism.
As a special niche feature, if your plugin needs to import some files and then act against an in-memory database containing those files you can forward to the :ref:`sqlite-utils memory command ` and pass it ``return_db=True``:
.. code-block:: python
@cli.command()
@click.pass_context
@click.argument(
"paths",
type=click.Path(file_okay=True, dir_okay=False, allow_dash=True),
required=False,
nargs=-1,
)
def show_schema_for_files(ctx, paths):
from sqlite_utils.cli import memory
db = ctx.invoke(memory, paths=paths, return_db=True)
# Now do something with that database
click.echo(db.schema)
.. _plugins_hooks_prepare_connection:
prepare_connection(conn)
~~~~~~~~~~~~~~~~~~~~~~~~
This hook is called when a new SQLite database connection is created. You can
use it to `register custom SQL functions `_,
aggregates and collations. For example:
.. code-block:: python
import sqlite_utils
@sqlite_utils.hookimpl
def prepare_connection(conn):
conn.create_function(
"hello", 1, lambda name: f"Hello, {name}!"
)
This registers a SQL function called ``hello`` which takes a single
argument and can be called like this:
.. code-block:: sql
select hello("world"); -- "Hello, world!"
sqlite-utils-4.0a0/docs/python-api.rst 0000664 0000000 0000000 00000307067 15007276064 0020033 0 ustar 00root root 0000000 0000000 .. _python_api:
=============================
sqlite_utils Python library
=============================
.. contents:: :local:
:class: this-will-duplicate-information-and-it-is-still-useful-here
.. _python_api_getting_started:
Getting started
===============
Here's how to create a new SQLite database file containing a new ``chickens`` table, populated with four records:
.. code-block:: python
from sqlite_utils import Database
db = Database("chickens.db")
db["chickens"].insert_all([{
"name": "Azi",
"color": "blue",
}, {
"name": "Lila",
"color": "blue",
}, {
"name": "Suna",
"color": "gold",
}, {
"name": "Cardi",
"color": "black",
}])
You can loop through those rows like this:
.. code-block:: python
for row in db["chickens"].rows:
print(row)
Which outputs the following::
{'name': 'Azi', 'color': 'blue'}
{'name': 'Lila', 'color': 'blue'}
{'name': 'Suna', 'color': 'gold'}
{'name': 'Cardi', 'color': 'black'}
To run a SQL query, use :ref:`db.query() `:
.. code-block:: python
for row in db.query("""
select color, count(*)
from chickens group by color
order by count(*) desc
"""):
print(row)
Which outputs::
{'color': 'blue', 'count(*)': 2}
{'color': 'gold', 'count(*)': 1}
{'color': 'black', 'count(*)': 1}
.. _python_api_connect:
Connecting to or creating a database
====================================
Database objects are constructed by passing in either a path to a file on disk or an existing SQLite3 database connection:
.. code-block:: python
from sqlite_utils import Database
db = Database("my_database.db")
This will create ``my_database.db`` if it does not already exist.
If you want to recreate a database from scratch (first removing the existing file from disk if it already exists) you can use the ``recreate=True`` argument:
.. code-block:: python
db = Database("my_database.db", recreate=True)
Instead of a file path you can pass in an existing SQLite connection:
.. code-block:: python
import sqlite3
db = Database(sqlite3.connect("my_database.db"))
If you want to create an in-memory database, you can do so like this:
.. code-block:: python
db = Database(memory=True)
You can also create a named in-memory database. Unlike regular memory databases these can be accessed by multiple threads, provided at least one reference to the database still exists. `del db` will clear the database from memory.
.. code-block:: python
db = Database(memory_name="my_shared_database")
Connections use ``PRAGMA recursive_triggers=on`` by default. If you don't want to use `recursive triggers `__ you can turn them off using:
.. code-block:: python
db = Database(memory=True, recursive_triggers=False)
By default, any :ref:`sqlite-utils plugins ` that implement the :ref:`plugins_hooks_prepare_connection` hook will be executed against the connection when you create the ``Database`` object. You can opt out of executing plugins using ``execute_plugins=False`` like this:
.. code-block:: python
db = Database(memory=True, execute_plugins=False)
You can pass ``strict=True`` to enable `SQLite STRICT mode `__ for all tables created using this database object:
.. code-block:: python
db = Database("my_database.db", strict=True)
.. _python_api_attach:
Attaching additional databases
------------------------------
SQLite supports cross-database SQL queries, which can join data from tables in more than one database file.
You can attach an additional database using the ``.attach()`` method, providing an alias to use for that database and the path to the SQLite file on disk.
.. code-block:: python
db = Database("first.db")
db.attach("second", "second.db")
# Now you can run queries like this one:
print(db.query("""
select * from table_in_first
union all
select * from second.table_in_second
"""))
You can reference tables in the attached database using the alias value you passed to ``db.attach(alias, filepath)`` as a prefix, for example the ``second.table_in_second`` reference in the SQL query above.
.. _python_api_tracing:
Tracing queries
---------------
You can use the ``tracer`` mechanism to see SQL queries that are being executed by SQLite. A tracer is a function that you provide which will be called with ``sql`` and ``params`` arguments every time SQL is executed, for example:
.. code-block:: python
def tracer(sql, params):
print("SQL: {} - params: {}".format(sql, params))
You can pass this function to the ``Database()`` constructor like so:
.. code-block:: python
db = Database(memory=True, tracer=tracer)
You can also turn on a tracer function temporarily for a block of code using the ``with db.tracer(...)`` context manager:
.. code-block:: python
db = Database(memory=True)
# ... later
with db.tracer(print):
db["dogs"].insert({"name": "Cleo"})
This example will print queries only for the duration of the ``with`` block.
.. _python_api_executing_queries:
Executing queries
=================
The ``Database`` class offers several methods for directly executing SQL queries.
.. _python_api_query:
db.query(sql, params)
---------------------
The ``db.query(sql)`` function executes a SQL query and returns an iterator over Python dictionaries representing the resulting rows:
.. code-block:: python
db = Database(memory=True)
db["dogs"].insert_all([{"name": "Cleo"}, {"name": "Pancakes"}])
for row in db.query("select * from dogs"):
print(row)
# Outputs:
# {'name': 'Cleo'}
# {'name': 'Pancakes'}
.. _python_api_execute:
db.execute(sql, params)
-----------------------
The ``db.execute()`` and ``db.executescript()`` methods provide wrappers around ``.execute()`` and ``.executescript()`` on the underlying SQLite connection. These wrappers log to the :ref:`tracer function ` if one has been registered.
``db.execute(sql)`` returns a `sqlite3.Cursor `__ that was used to execute the SQL.
.. code-block:: python
db = Database(memory=True)
db["dogs"].insert({"name": "Cleo"})
cursor = db.execute("update dogs set name = 'Cleopaws'")
print(cursor.rowcount)
# Outputs the number of rows affected by the update
# In this case 2
Other cursor methods such as ``.fetchone()`` and ``.fetchall()`` are also available, see the `standard library documentation `__.
.. _python_api_parameters:
Passing parameters
------------------
Both ``db.query()`` and ``db.execute()`` accept an optional second argument for parameters to be passed to the SQL query.
This can take the form of either a tuple/list or a dictionary, depending on the type of parameters used in the query. Values passed in this way will be correctly quoted and escaped, helping avoid SQL injection vulnerabilities.
``?`` parameters in the SQL query can be filled in using a list:
.. code-block:: python
db.execute("update dogs set name = ?", ["Cleopaws"])
# This will rename ALL dogs to be called "Cleopaws"
Named parameters using ``:name`` can be filled using a dictionary:
.. code-block:: python
dog = next(db.query(
"select rowid, name from dogs where name = :name",
{"name": "Cleopaws"}
))
# dog is now {'rowid': 1, 'name': 'Cleopaws'}
In this example ``next()`` is used to retrieve the first result in the iterator returned by the ``db.query()`` method.
.. _python_api_table:
Accessing tables
================
Tables are accessed using the indexing operator, like so:
.. code-block:: python
table = db["my_table"]
If the table does not yet exist, it will be created the first time you attempt to insert or upsert data into it.
You can also access tables using the ``.table()`` method like so:
.. code-block:: python
table = db.table("my_table")
Using this factory function allows you to set :ref:`python_api_table_configuration`.
.. _python_api_tables:
Listing tables
==============
You can list the names of tables in a database using the ``.table_names()`` method::
>>> db.table_names()
['dogs']
To see just the FTS4 tables, use ``.table_names(fts4=True)``. For FTS5, use ``.table_names(fts5=True)``.
You can also iterate through the table objects themselves using the ``.tables`` property::
>>> db.tables
[]
.. _python_api_views:
Listing views
=============
``.view_names()`` shows you a list of views in the database::
>>> db.view_names()
['good_dogs']
You can iterate through view objects using the ``.views`` property::
>>> db.views
[]
View objects are similar to Table objects, except that any attempts to insert or update data will throw an error. The full list of methods and properties available on a view object is as follows:
* ``columns``
* ``columns_dict``
* ``count``
* ``schema``
* ``rows``
* ``rows_where(where, where_args, order_by, select)``
* ``drop()``
.. _python_api_rows:
Listing rows
============
To iterate through dictionaries for each of the rows in a table, use ``.rows``::
>>> db = sqlite_utils.Database("dogs.db")
>>> for row in db["dogs"].rows:
... print(row)
{'id': 1, 'age': 4, 'name': 'Cleo'}
{'id': 2, 'age': 2, 'name': 'Pancakes'}
You can filter rows by a WHERE clause using ``.rows_where(where, where_args)``::
>>> db = sqlite_utils.Database("dogs.db")
>>> for row in db["dogs"].rows_where("age > ?", [3]):
... print(row)
{'id': 1, 'age': 4, 'name': 'Cleo'}
The first argument is a fragment of SQL. The second, optional argument is values to be passed to that fragment - you can use ``?`` placeholders and pass an array, or you can use ``:named`` parameters and pass a dictionary, like this::
>>> for row in db["dogs"].rows_where("age > :age", {"age": 3}):
... print(row)
{'id': 1, 'age': 4, 'name': 'Cleo'}
To return custom columns (instead of the default that uses ``select *``) pass ``select="column1, column2"``::
>>> db = sqlite_utils.Database("dogs.db")
>>> for row in db["dogs"].rows_where(select='name, age'):
... print(row)
{'name': 'Cleo', 'age': 4}
To specify an order, use the ``order_by=`` argument::
>>> for row in db["dogs"].rows_where("age > 1", order_by="age"):
... print(row)
{'id': 2, 'age': 2, 'name': 'Pancakes'}
{'id': 1, 'age': 4, 'name': 'Cleo'}
You can use ``order_by="age desc"`` for descending order.
You can order all records in the table by excluding the ``where`` argument::
>>> for row in db["dogs"].rows_where(order_by="age desc"):
... print(row)
{'id': 1, 'age': 4, 'name': 'Cleo'}
{'id': 2, 'age': 2, 'name': 'Pancakes'}
This method also accepts ``offset=`` and ``limit=`` arguments, for specifying an OFFSET and a LIMIT for the SQL query::
>>> for row in db["dogs"].rows_where(order_by="age desc", limit=1):
... print(row)
{'id': 1, 'age': 4, 'name': 'Cleo'}
.. _python_api_rows_count_where:
Counting rows
-------------
To count the number of rows that would be returned by a where filter, use ``.count_where(where, where_args)``:
>>> db["dogs"].count_where("age > ?", [1])
2
.. _python_api_pks_and_rows_where:
Listing rows with their primary keys
====================================
Sometimes it can be useful to retrieve the primary key along with each row, in order to pass that key (or primary key tuple) to the ``.get()`` or ``.update()`` methods.
The ``.pks_and_rows_where()`` method takes the same signature as ``.rows_where()`` (with the exception of the ``select=`` parameter) but returns a generator that yields pairs of ``(primary key, row dictionary)``.
The primary key value will usually be a single value but can also be a tuple if the table has a compound primary key.
If the table is a ``rowid`` table (with no explicit primary key column) then that ID will be returned.
::
>>> db = sqlite_utils.Database(memory=True)
>>> db["dogs"].insert({"name": "Cleo"})
>>> for pk, row in db["dogs"].pks_and_rows_where():
... print(pk, row)
1 {'rowid': 1, 'name': 'Cleo'}
>>> db["dogs_with_pk"].insert({"id": 5, "name": "Cleo"}, pk="id")
>>> for pk, row in db["dogs_with_pk"].pks_and_rows_where():
... print(pk, row)
5 {'id': 5, 'name': 'Cleo'}
>>> db["dogs_with_compound_pk"].insert(
... {"species": "dog", "id": 3, "name": "Cleo"},
... pk=("species", "id")
... )
>>> for pk, row in db["dogs_with_compound_pk"].pks_and_rows_where():
... print(pk, row)
('dog', 3) {'species': 'dog', 'id': 3, 'name': 'Cleo'}
.. _python_api_get:
Retrieving a specific record
============================
You can retrieve a record by its primary key using ``table.get()``::
>>> db = sqlite_utils.Database("dogs.db")
>>> print(db["dogs"].get(1))
{'id': 1, 'age': 4, 'name': 'Cleo'}
If the table has a compound primary key you can pass in the primary key values as a tuple::
>>> db["compound_dogs"].get(("mixed", 3))
If the record does not exist a ``NotFoundError`` will be raised:
.. code-block:: python
from sqlite_utils.db import NotFoundError
try:
row = db["dogs"].get(5)
except NotFoundError:
print("Dog not found")
.. _python_api_schema:
Showing the schema
==================
The ``db.schema`` property returns the full SQL schema for the database as a string::
>>> db = sqlite_utils.Database("dogs.db")
>>> print(db.schema)
CREATE TABLE "dogs" (
[id] INTEGER PRIMARY KEY,
[name] TEXT
);
.. _python_api_creating_tables:
Creating tables
===============
The easiest way to create a new table is to insert a record into it:
.. code-block:: python
from sqlite_utils import Database
import sqlite3
db = Database("dogs.db")
dogs = db["dogs"]
dogs.insert({
"name": "Cleo",
"twitter": "cleopaws",
"age": 3,
"is_good_dog": True,
})
This will automatically create a new table called "dogs" with the following schema::
CREATE TABLE dogs (
name TEXT,
twitter TEXT,
age INTEGER,
is_good_dog INTEGER
)
You can also specify a primary key by passing the ``pk=`` parameter to the ``.insert()`` call. This will only be obeyed if the record being inserted causes the table to be created:
.. code-block:: python
dogs.insert({
"id": 1,
"name": "Cleo",
"twitter": "cleopaws",
"age": 3,
"is_good_dog": True,
}, pk="id")
After inserting a row like this, the ``dogs.last_rowid`` property will return the SQLite ``rowid`` assigned to the most recently inserted record.
The ``dogs.last_pk`` property will return the last inserted primary key value, if you specified one. This can be very useful when writing code that creates foreign keys or many-to-many relationships.
.. _python_api_custom_columns:
Custom column order and column types
------------------------------------
The order of the columns in the table will be derived from the order of the keys in the dictionary, provided you are using Python 3.6 or later.
If you want to explicitly set the order of the columns you can do so using the ``column_order=`` parameter:
.. code-block:: python
db["dogs"].insert({
"id": 1,
"name": "Cleo",
"twitter": "cleopaws",
"age": 3,
"is_good_dog": True,
}, pk="id", column_order=("id", "twitter", "name"))
You don't need to pass all of the columns to the ``column_order`` parameter. If you only pass a subset of the columns the remaining columns will be ordered based on the key order of the dictionary.
Column types are detected based on the example data provided. Sometimes you may find you need to over-ride these detected types - to create an integer column for data that was provided as a string for example, or to ensure that a table where the first example was ``None`` is created as an ``INTEGER`` rather than a ``TEXT`` column. You can do this using the ``columns=`` parameter:
.. code-block:: python
db["dogs"].insert({
"id": 1,
"name": "Cleo",
"age": "5",
}, pk="id", columns={"age": int, "weight": float})
This will create a table with the following schema:
.. code-block:: sql
CREATE TABLE [dogs] (
[id] INTEGER PRIMARY KEY,
[name] TEXT,
[age] INTEGER,
[weight] FLOAT
)
.. _python_api_explicit_create:
Explicitly creating a table
---------------------------
You can directly create a new table without inserting any data into it using the ``.create()`` method:
.. code-block:: python
db["cats"].create({
"id": int,
"name": str,
"weight": float,
}, pk="id")
The first argument here is a dictionary specifying the columns you would like to create. Each column is paired with a Python type indicating the type of column. See :ref:`python_api_add_column` for full details on how these types work.
This method takes optional arguments ``pk=``, ``column_order=``, ``foreign_keys=``, ``not_null=set()`` and ``defaults=dict()`` - explained below.
A ``sqlite_utils.utils.sqlite3.OperationalError`` will be raised if a table of that name already exists.
You can pass ``ignore=True`` to ignore that error. You can also use ``if_not_exists=True`` to use the SQL ``CREATE TABLE IF NOT EXISTS`` pattern to achieve the same effect:
.. code-block:: python
db["cats"].create({
"id": int,
"name": str,
}, pk="id", if_not_exists=True)
To drop and replace any existing table of that name, pass ``replace=True``. This is a **dangerous operation** that will result in loss of existing data in the table.
You can also pass ``transform=True`` to have any existing tables :ref:`transformed ` to match your new table specification. This is a **dangerous operation** as it will drop columns that are no longer listed in your call to ``.create()``, so be careful when running this.
.. code-block:: python
db["cats"].create({
"id": int,
"name": str,
"weight": float,
}, pk="id", transform=True)
The ``transform=True`` option will update the table schema if any of the following have changed:
- The specified columns or their types
- The specified primary key
- The order of the columns, defined using ``column_order=``
- The ``not_null=`` or ``defaults=`` arguments
Changes to ``foreign_keys=`` are not currently detected and applied by ``transform=True``.
You can pass ``strict=True`` to create a table in ``STRICT`` mode:
.. code-block:: python
db["cats"].create({
"id": int,
"name": str,
}, strict=True)
.. _python_api_compound_primary_keys:
Compound primary keys
---------------------
If you want to create a table with a compound primary key that spans multiple columns, you can do so by passing a tuple of column names to any of the methods that accept a ``pk=`` parameter. For example:
.. code-block:: python
db["cats"].create({
"id": int,
"breed": str,
"name": str,
"weight": float,
}, pk=("breed", "id"))
This also works for the ``.insert()``, ``.insert_all()``, ``.upsert()`` and ``.upsert_all()`` methods.
.. _python_api_foreign_keys:
Specifying foreign keys
-----------------------
Any operation that can create a table (``.create()``, ``.insert()``, ``.insert_all()``, ``.upsert()`` and ``.upsert_all()``) accepts an optional ``foreign_keys=`` argument which can be used to set up foreign key constraints for the table that is being created.
If you are using your database with `Datasette `__, Datasette will detect these constraints and use them to generate hyperlinks to associated records.
The ``foreign_keys`` argument takes a list that indicates which foreign keys should be created. The list can take several forms. The simplest is a list of columns:
.. code-block:: python
foreign_keys=["author_id"]
The library will guess which tables you wish to reference based on the column names using the rules described in :ref:`python_api_add_foreign_key`.
You can also be more explicit, by passing in a list of tuples:
.. code-block:: python
foreign_keys=[
("author_id", "authors", "id")
]
This means that the ``author_id`` column should be a foreign key that references the ``id`` column in the ``authors`` table.
You can leave off the third item in the tuple to have the referenced column automatically set to the primary key of that table. A full example:
.. code-block:: python
db["authors"].insert_all([
{"id": 1, "name": "Sally"},
{"id": 2, "name": "Asheesh"}
], pk="id")
db["books"].insert_all([
{"title": "Hedgehogs of the world", "author_id": 1},
{"title": "How to train your wolf", "author_id": 2},
], foreign_keys=[
("author_id", "authors")
])
.. _python_api_table_configuration:
Table configuration options
---------------------------
The ``.insert()``, ``.upsert()``, ``.insert_all()`` and ``.upsert_all()`` methods each take a number of keyword arguments, some of which influence what happens should they cause a table to be created and some of which affect the behavior of those methods.
You can set default values for these methods by accessing the table through the ``db.table(...)`` method (instead of using ``db["table_name"]``), like so:
.. code-block:: python
table = db.table(
"authors",
pk="id",
not_null={"name", "score"},
column_order=("id", "name", "score", "url")
)
# Now you can call .insert() like so:
table.insert({"id": 1, "name": "Tracy", "score": 5})
The configuration options that can be specified in this way are ``pk``, ``foreign_keys``, ``column_order``, ``not_null``, ``defaults``, ``batch_size``, ``hash_id``, ``hash_id_columns``, ``alter``, ``ignore``, ``replace``, ``extracts``, ``conversions``, ``columns``, ``strict``. These are all documented below.
.. _python_api_defaults_not_null:
Setting defaults and not null constraints
-----------------------------------------
Each of the methods that can cause a table to be created take optional arguments ``not_null=set()`` and ``defaults=dict()``. The methods that take these optional arguments are:
* ``db.create_table(...)``
* ``table.create(...)``
* ``table.insert(...)``
* ``table.insert_all(...)``
* ``table.upsert(...)``
* ``table.upsert_all(...)``
You can use ``not_null=`` to pass a set of column names that should have a ``NOT NULL`` constraint set on them when they are created.
You can use ``defaults=`` to pass a dictionary mapping columns to the default value that should be specified in the ``CREATE TABLE`` statement.
Here's an example that uses these features:
.. code-block:: python
db["authors"].insert_all(
[{"id": 1, "name": "Sally", "score": 2}],
pk="id",
not_null={"name", "score"},
defaults={"score": 1},
)
db["authors"].insert({"name": "Dharma"})
list(db["authors"].rows)
# Outputs:
# [{'id': 1, 'name': 'Sally', 'score': 2},
# {'id': 3, 'name': 'Dharma', 'score': 1}]
print(db["authors"].schema)
# Outputs:
# CREATE TABLE [authors] (
# [id] INTEGER PRIMARY KEY,
# [name] TEXT NOT NULL,
# [score] INTEGER NOT NULL DEFAULT 1
# )
.. _python_api_rename_table:
Renaming a table
================
The ``db.rename_table(old_name, new_name)`` method can be used to rename a table:
.. code-block:: python
db.rename_table("my_table", "new_name_for_my_table")
This executes the following SQL:
.. code-block:: sql
ALTER TABLE [my_table] RENAME TO [new_name_for_my_table]
.. _python_api_duplicate:
Duplicating tables
==================
The ``table.duplicate()`` method creates a copy of the table, copying both the table schema and all of the rows in that table:
.. code-block:: python
db["authors"].duplicate("authors_copy")
The new ``authors_copy`` table will now contain a duplicate copy of the data from ``authors``.
This method raises ``sqlite_utils.db.NoTable`` if the table does not exist.
.. _python_api_bulk_inserts:
Bulk inserts
============
If you have more than one record to insert, the ``insert_all()`` method is a much more efficient way of inserting them. Just like ``insert()`` it will automatically detect the columns that should be created, but it will inspect the first batch of 100 items to help decide what those column types should be.
Use it like this:
.. code-block:: python
db["dogs"].insert_all([{
"id": 1,
"name": "Cleo",
"twitter": "cleopaws",
"age": 3,
"is_good_dog": True,
}, {
"id": 2,
"name": "Marnie",
"twitter": "MarnieTheDog",
"age": 16,
"is_good_dog": True,
}], pk="id", column_order=("id", "twitter", "name"))
The column types used in the ``CREATE TABLE`` statement are automatically derived from the types of data in that first batch of rows. Any additional columns in subsequent batches will cause a ``sqlite3.OperationalError`` exception to be raised unless the ``alter=True`` argument is supplied, in which case the new columns will be created.
The function can accept an iterator or generator of rows and will commit them according to the batch size. The default batch size is 100, but you can specify a different size using the ``batch_size`` parameter:
.. code-block:: python
db["big_table"].insert_all(({
"id": 1,
"name": "Name {}".format(i),
} for i in range(10000)), batch_size=1000)
You can skip inserting any records that have a primary key that already exists using ``ignore=True``. This works with both ``.insert({...}, ignore=True)`` and ``.insert_all([...], ignore=True)``.
You can delete all the existing rows in the table before inserting the new records using ``truncate=True``. This is useful if you want to replace the data in the table.
Pass ``analyze=True`` to run ``ANALYZE`` against the table after inserting the new records.
.. _python_api_insert_replace:
Insert-replacing data
=====================
If you try to insert data using a primary key that already exists, the ``.insert()`` or ``.insert_all()`` method will raise a ``sqlite3.IntegrityError`` exception.
This example that catches that exception:
.. code-block:: python
from sqlite_utils.utils import sqlite3
try:
db["dogs"].insert({"id": 1, "name": "Cleo"}, pk="id")
except sqlite3.IntegrityError:
print("Record already exists with that primary key")
Importing from ``sqlite_utils.utils.sqlite3`` ensures your code continues to work even if you are using the ``pysqlite3`` library instead of the Python standard library ``sqlite3`` module.
Use the ``ignore=True`` parameter to ignore this error:
.. code-block:: python
# This fails silently if a record with id=1 already exists
db["dogs"].insert({"id": 1, "name": "Cleo"}, pk="id", ignore=True)
To replace any existing records that have a matching primary key, use the ``replace=True`` parameter to ``.insert()`` or ``.insert_all()``:
.. code-block:: python
db["dogs"].insert_all([{
"id": 1,
"name": "Cleo",
"twitter": "cleopaws",
"age": 3,
"is_good_dog": True,
}, {
"id": 2,
"name": "Marnie",
"twitter": "MarnieTheDog",
"age": 16,
"is_good_dog": True,
}], pk="id", replace=True)
.. note::
Prior to sqlite-utils 2.0 the ``.upsert()`` and ``.upsert_all()`` methods worked the same way as ``.insert(replace=True)`` does today. See :ref:`python_api_upsert` for the new behaviour of those methods introduced in 2.0.
.. _python_api_update:
Updating a specific record
==========================
You can update a record by its primary key using ``table.update()``::
>>> db = sqlite_utils.Database("dogs.db")
>>> print(db["dogs"].get(1))
{'id': 1, 'age': 4, 'name': 'Cleo'}
>>> db["dogs"].update(1, {"age": 5})
>>> print(db["dogs"].get(1))
{'id': 1, 'age': 5, 'name': 'Cleo'}
The first argument to ``update()`` is the primary key. This can be a single value, or a tuple if that table has a compound primary key::
>>> db["compound_dogs"].update((5, 3), {"name": "Updated"})
The second argument is a dictionary of columns that should be updated, along with their new values.
You can cause any missing columns to be added automatically using ``alter=True``::
>>> db["dogs"].update(1, {"breed": "Mutt"}, alter=True)
.. _python_api_delete:
Deleting a specific record
==========================
You can delete a record using ``table.delete()``::
>>> db = sqlite_utils.Database("dogs.db")
>>> db["dogs"].delete(1)
The ``delete()`` method takes the primary key of the record. This can be a tuple of values if the row has a compound primary key::
>>> db["compound_dogs"].delete((5, 3))
.. _python_api_delete_where:
Deleting multiple records
=========================
You can delete all records in a table that match a specific WHERE statement using ``table.delete_where()``::
>>> db = sqlite_utils.Database("dogs.db")
>>> # Delete every dog with age less than 3
>>> with db.conn:
>>> db["dogs"].delete_where("age < ?", [3])
Calling ``table.delete_where()`` with no other arguments will delete every row in the table.
Pass ``analyze=True`` to run ``ANALYZE`` against the table after deleting the rows.
.. _python_api_upsert:
Upserting data
==============
Upserting allows you to insert records if they do not exist and update them if they DO exist, based on matching against their primary key.
For example, given the dogs database you could upsert the record for Cleo like so:
.. code-block:: python
db["dogs"].upsert({
"id": 1,
"name": "Cleo",
"twitter": "cleopaws",
"age": 4,
"is_good_dog": True,
}, pk="id", column_order=("id", "twitter", "name"))
If a record exists with id=1, it will be updated to match those fields. If it does not exist it will be created.
Any existing columns that are not referenced in the dictionary passed to ``.upsert()`` will be unchanged. If you want to replace a record entirely, use ``.insert(doc, replace=True)`` instead.
Note that the ``pk`` and ``column_order`` parameters here are optional if you are certain that the table has already been created. You should pass them if the table may not exist at the time the first upsert is performed.
An ``upsert_all()`` method is also available, which behaves like ``insert_all()`` but performs upserts instead.
.. note::
``.upsert()`` and ``.upsert_all()`` in sqlite-utils 1.x worked like ``.insert(..., replace=True)`` and ``.insert_all(..., replace=True)`` do in 2.x. See `issue #66 `__ for details of this change.
.. _python_api_old_upsert:
Alternative upserts using INSERT OR IGNORE
------------------------------------------
Upserts use ``INSERT INTO ... ON CONFLICT SET``. Prior to ``sqlite-utils 4.0`` these used a sequence of ``INSERT OR IGNORE`` followed by an ``UPDATE``. This older method is still used for SQLite 3.23.1 and earlier. You can force the older implementation by passing ``use_old_upsert=True`` to the ``Database()`` constructor.
.. _python_api_convert:
Converting data in columns
==========================
The ``table.convert(...)`` method can be used to apply a conversion function to the values in a column, either to update that column or to populate new columns. It is the Python library equivalent of the :ref:`sqlite-utils convert ` command.
This feature works by registering a custom SQLite function that applies a Python transformation, then running a SQL query equivalent to ``UPDATE table SET column = convert_value(column);``
To transform a specific column to uppercase, you would use the following:
.. code-block:: python
db["dogs"].convert("name", lambda value: value.upper())
You can pass a list of columns, in which case the transformation will be applied to each one:
.. code-block:: python
db["dogs"].convert(["name", "twitter"], lambda value: value.upper())
To save the output to of the transformation to a different column, use the ``output=`` parameter:
.. code-block:: python
db["dogs"].convert("name", lambda value: value.upper(), output="name_upper")
This will add the new column, if it does not already exist. You can pass ``output_type=int`` or some other type to control the type of the new column - otherwise it will default to text.
If you want to drop the original column after saving the results in a separate output column, pass ``drop=True``.
By default any rows with a falsey value for the column - such as ``0`` or ``None`` - will be skipped. Pass ``skip_false=False`` to disable this behaviour.
You can create multiple new columns from a single input column by passing ``multi=True`` and a conversion function that returns a Python dictionary. This example creates new ``upper`` and ``lower`` columns populated from the single ``title`` column:
.. code-block:: python
table.convert(
"title", lambda v: {"upper": v.upper(), "lower": v.lower()}, multi=True
)
The ``.convert()`` method accepts optional ``where=`` and ``where_args=`` parameters which can be used to apply the conversion to a subset of rows specified by a where clause. Here's how to apply the conversion only to rows with an ``id`` that is higher than 20:
.. code-block:: python
table.convert("title", lambda v: v.upper(), where="id > :id", where_args={"id": 20})
These behave the same as the corresponding parameters to the :ref:`.rows_where() ` method, so you can use ``?`` placeholders and a list of values instead of ``:named`` placeholders with a dictionary.
.. _python_api_lookup_tables:
Working with lookup tables
==========================
A useful pattern when populating large tables in to break common values out into lookup tables. Consider a table of ``Trees``, where each tree has a species. Ideally these species would be split out into a separate ``Species`` table, with each one assigned an integer primary key that can be referenced from the ``Trees`` table ``species_id`` column.
.. _python_api_explicit_lookup_tables:
Creating lookup tables explicitly
---------------------------------
Calling ``db["Species"].lookup({"name": "Palm"})`` creates a table called ``Species`` (if one does not already exist) with two columns: ``id`` and ``name``. It sets up a unique constraint on the ``name`` column to guarantee it will not contain duplicate rows. It then inserts a new row with the ``name`` set to ``Palm`` and returns the new integer primary key value.
If the ``Species`` table already exists, it will insert the new row and return the primary key. If a row with that ``name`` already exists, it will return the corresponding primary key value directly.
If you call ``.lookup()`` against an existing table without the unique constraint it will attempt to add the constraint, raising an ``IntegrityError`` if the constraint cannot be created.
If you pass in a dictionary with multiple values, both values will be used to insert or retrieve the corresponding ID and any unique constraint that is created will cover all of those columns, for example:
.. code-block:: python
db["Trees"].insert({
"latitude": 49.1265976,
"longitude": 2.5496218,
"species": db["Species"].lookup({
"common_name": "Common Juniper",
"latin_name": "Juniperus communis"
})
})
The ``.lookup()`` method has an optional second argument which can be used to populate other columns in the table but only if the row does not exist yet. These columns will not be included in the unique index.
To create a species record with a note on when it was first seen, you can use this:
.. code-block:: python
db["Species"].lookup({"name": "Palm"}, {"first_seen": "2021-03-04"})
The first time this is called the record will be created for ``name="Palm"``. Any subsequent calls with that name will ignore the second argument, even if it includes different values.
``.lookup()`` also accepts keyword arguments, which are passed through to the :ref:`insert() method ` and can be used to influence the shape of the created table. Supported parameters are:
- ``pk`` - which defaults to ``id``
- ``foreign_keys``
- ``column_order``
- ``not_null``
- ``defaults``
- ``extracts``
- ``conversions``
- ``columns``
- ``strict``
.. _python_api_extracts:
Populating lookup tables automatically during insert/upsert
-----------------------------------------------------------
A more efficient way to work with lookup tables is to define them using the ``extracts=`` parameter, which is accepted by ``.insert()``, ``.upsert()``, ``.insert_all()``, ``.upsert_all()`` and by the ``.table(...)`` factory function.
``extracts=`` specifies columns which should be "extracted" out into a separate lookup table during the data insertion.
It can be either a list of column names, in which case the extracted table names will match the column names exactly, or it can be a dictionary mapping column names to the desired name of the extracted table.
To extract the ``species`` column out to a separate ``Species`` table, you can do this:
.. code-block:: python
# Using the table factory
trees = db.table("Trees", extracts={"species": "Species"})
trees.insert({
"latitude": 49.1265976,
"longitude": 2.5496218,
"species": "Common Juniper"
})
# If you want the table to be called 'species', you can do this:
trees = db.table("Trees", extracts=["species"])
# Using .insert() directly
db["Trees"].insert({
"latitude": 49.1265976,
"longitude": 2.5496218,
"species": "Common Juniper"
}, extracts={"species": "Species"})
.. _python_api_m2m:
Working with many-to-many relationships
=======================================
``sqlite-utils`` includes a shortcut for creating records using many-to-many relationships in the form of the ``table.m2m(...)`` method.
Here's how to create two new records and connect them via a many-to-many table in a single line of code:
.. code-block:: python
db["dogs"].insert({"id": 1, "name": "Cleo"}, pk="id").m2m(
"humans", {"id": 1, "name": "Natalie"}, pk="id"
)
Running this example actually creates three tables: ``dogs``, ``humans`` and a many-to-many ``dogs_humans`` table. It will insert a record into each of those tables.
The ``.m2m()`` method executes against the last record that was affected by ``.insert()`` or ``.update()`` - the record identified by the ``table.last_pk`` property. To execute ``.m2m()`` against a specific record you can first select it by passing its primary key to ``.update()``:
.. code-block:: python
db["dogs"].update(1).m2m(
"humans", {"id": 2, "name": "Simon"}, pk="id"
)
The first argument to ``.m2m()`` can be either the name of a table as a string or it can be the table object itself.
The second argument can be a single dictionary record or a list of dictionaries. These dictionaries will be passed to ``.upsert()`` against the specified table.
Here's alternative code that creates the dog record and adds two people to it:
.. code-block:: python
db = Database(memory=True)
dogs = db.table("dogs", pk="id")
humans = db.table("humans", pk="id")
dogs.insert({"id": 1, "name": "Cleo"}).m2m(
humans, [
{"id": 1, "name": "Natalie"},
{"id": 2, "name": "Simon"}
]
)
The method will attempt to find an existing many-to-many table by looking for a table that has foreign key relationships against both of the tables in the relationship.
If it cannot find such a table, it will create a new one using the names of the two tables - ``dogs_humans`` in this example. You can customize the name of this table using the ``m2m_table=`` argument to ``.m2m()``.
It it finds multiple candidate tables with foreign keys to both of the specified tables it will raise a ``sqlite_utils.db.NoObviousTable`` exception. You can avoid this error by specifying the correct table using ``m2m_table=``.
The ``.m2m()`` method also takes an optional ``pk=`` argument to specify the primary key that should be used if the table is created, and an optional ``alter=True`` argument to specify that any missing columns of an existing table should be added if they are needed.
.. _python_api_m2m_lookup:
Using m2m and lookup tables together
------------------------------------
You can work with (or create) lookup tables as part of a call to ``.m2m()`` using the ``lookup=`` parameter. This accepts the same argument as ``table.lookup()`` does - a dictionary of values that should be used to lookup or create a row in the lookup table.
This example creates a dogs table, populates it, creates a characteristics table, populates that and sets up a many-to-many relationship between the two. It chains ``.m2m()`` twice to create two associated characteristics:
.. code-block:: python
db = Database(memory=True)
dogs = db.table("dogs", pk="id")
dogs.insert({"id": 1, "name": "Cleo"}).m2m(
"characteristics", lookup={
"name": "Playful"
}
).m2m(
"characteristics", lookup={
"name": "Opinionated"
}
)
You can inspect the database to see the results like this::
>>> db.table_names()
['dogs', 'characteristics', 'characteristics_dogs']
>>> list(db["dogs"].rows)
[{'id': 1, 'name': 'Cleo'}]
>>> list(db["characteristics"].rows)
[{'id': 1, 'name': 'Playful'}, {'id': 2, 'name': 'Opinionated'}]
>>> list(db["characteristics_dogs"].rows)
[{'characteristics_id': 1, 'dogs_id': 1}, {'characteristics_id': 2, 'dogs_id': 1}]
>>> print(db["characteristics_dogs"].schema)
CREATE TABLE [characteristics_dogs] (
[characteristics_id] INTEGER REFERENCES [characteristics]([id]),
[dogs_id] INTEGER REFERENCES [dogs]([id]),
PRIMARY KEY ([characteristics_id], [dogs_id])
)
.. _python_api_analyze_column:
Analyzing a column
==================
The ``table.analyze_column(column)`` method is used by the :ref:`analyze-tables ` CLI command.
It takes the following arguments and options:
``column`` - required
The name of the column to analyze
``common_limit``
The number of most common values to return. Defaults to 10.
``value_truncate``
If set to an integer, values longer than this will be truncated to this length. Defaults to None.
``most_common``
If set to False, the ``most_common`` field of the returned ``ColumnDetails`` will be set to None. Defaults to True.
``least_common``
If set to False, the ``least_common`` field of the returned ``ColumnDetails`` will be set to None. Defaults to True.
And returns a ``ColumnDetails`` named tuple with the following fields:
``table``
The name of the table
``column``
The name of the column
``total_rows``
The total number of rows in the table
``num_null``
The number of rows for which this column is null
``num_blank``
The number of rows for which this column is blank (the empty string)
``num_distinct``
The number of distinct values in this column
``most_common``
The ``N`` most common values as a list of ``(value, count)`` tuples`, or ``None`` if the table consists entirely of distinct values
``least_common``
The ``N`` least common values as a list of ``(value, count)`` tuples`, or ``None`` if the table is entirely distinct or if the number of distinct values is less than N (since they will already have been returned in ``most_common``)
.. _python_api_add_column:
Adding columns
==============
You can add a new column to a table using the ``.add_column(col_name, col_type)`` method:
.. code-block:: python
db["dogs"].add_column("instagram", str)
db["dogs"].add_column("weight", float)
db["dogs"].add_column("dob", datetime.date)
db["dogs"].add_column("image", "BLOB")
db["dogs"].add_column("website") # str by default
You can specify the ``col_type`` argument either using a SQLite type as a string, or by directly passing a Python type e.g. ``str`` or ``float``.
The ``col_type`` is optional - if you omit it the type of ``TEXT`` will be used.
SQLite types you can specify are ``"TEXT"``, ``"INTEGER"``, ``"FLOAT"`` or ``"BLOB"``.
If you pass a Python type, it will be mapped to SQLite types as shown here::
float: "FLOAT"
int: "INTEGER"
bool: "INTEGER"
str: "TEXT"
bytes: "BLOB"
datetime.datetime: "TEXT"
datetime.date: "TEXT"
datetime.time: "TEXT"
datetime.timedelta: "TEXT"
# If numpy is installed
np.int8: "INTEGER"
np.int16: "INTEGER"
np.int32: "INTEGER"
np.int64: "INTEGER"
np.uint8: "INTEGER"
np.uint16: "INTEGER"
np.uint32: "INTEGER"
np.uint64: "INTEGER"
np.float16: "FLOAT"
np.float32: "FLOAT"
np.float64: "FLOAT"
.. note::
In sqlite-utils 3.x ``FLOAT`` is used for floating point columns when the correct column type is actually ``REAL``. If you specify ``strict=True`` tables created in strict mode will use the correct column type of ``REAL`` instead. We plan to change this behavior in ``sqlite-utils`` 4.x to always use ``REAL``, but this will represent a minor breaking change and so is being held for the next major release, see issue :issue:`645`.
You can also add a column that is a foreign key reference to another table using the ``fk`` parameter:
.. code-block:: python
db["dogs"].add_column("species_id", fk="species")
This will automatically detect the name of the primary key on the species table and use that (and its type) for the new column.
You can explicitly specify the column you wish to reference using ``fk_col``:
.. code-block:: python
db["dogs"].add_column("species_id", fk="species", fk_col="ref")
You can set a ``NOT NULL DEFAULT 'x'`` constraint on the new column using ``not_null_default``:
.. code-block:: python
db["dogs"].add_column("friends_count", int, not_null_default=0)
.. _python_api_add_column_alter:
Adding columns automatically on insert/update
=============================================
You can insert or update data that includes new columns and have the table automatically altered to fit the new schema using the ``alter=True`` argument. This can be passed to all four of ``.insert()``, ``.upsert()``, ``.insert_all()`` and ``.upsert_all()``, or it can be passed to ``db.table(table_name, alter=True)`` to enable it by default for all method calls against that table instance.
.. code-block:: python
db["new_table"].insert({"name": "Gareth"})
# This will throw an exception:
db["new_table"].insert({"name": "Gareth", "age": 32})
# This will succeed and add a new "age" integer column:
db["new_table"].insert({"name": "Gareth", "age": 32}, alter=True)
# You can see confirm the new column like so:
print(db["new_table"].columns_dict)
# Outputs this:
# {'name': , 'age': }
# This works too:
new_table = db.table("new_table", alter=True)
new_table.insert({"name": "Gareth", "age": 32, "shoe_size": 11})
.. _python_api_add_foreign_key:
Adding foreign key constraints
==============================
The SQLite ``ALTER TABLE`` statement doesn't have the ability to add foreign key references to an existing column.
The ``add_foreign_key()`` method here is a convenient wrapper around :ref:`table.transform() `.
It's also possible to add foreign keys by directly updating the `sqlite_master` table. The `sqlite-utils-fast-fks `__ plugin implements this pattern, using code that was included with ``sqlite-utils`` prior to version 3.35.
Here's an example of this mechanism in action:
.. code-block:: python
db["authors"].insert_all([
{"id": 1, "name": "Sally"},
{"id": 2, "name": "Asheesh"}
], pk="id")
db["books"].insert_all([
{"title": "Hedgehogs of the world", "author_id": 1},
{"title": "How to train your wolf", "author_id": 2},
])
db["books"].add_foreign_key("author_id", "authors", "id")
The ``table.add_foreign_key(column, other_table, other_column)`` method takes the name of the column, the table that is being referenced and the key column within that other table. If you omit the ``other_column`` argument the primary key from that table will be used automatically. If you omit the ``other_table`` argument the table will be guessed based on some simple rules:
- If the column is of format ``author_id``, look for tables called ``author`` or ``authors``
- If the column does not end in ``_id``, try looking for a table with the exact name of the column or that name with an added ``s``
This method first checks that the specified foreign key references tables and columns that exist and does not clash with an existing foreign key. It will raise a ``sqlite_utils.db.AlterError`` exception if these checks fail.
To ignore the case where the key already exists, use ``ignore=True``:
.. code-block:: python
db["books"].add_foreign_key("author_id", "authors", "id", ignore=True)
.. _python_api_add_foreign_keys:
Adding multiple foreign key constraints at once
-----------------------------------------------
You can use ``db.add_foreign_keys(...)`` to add multiple foreign keys in one go. This method takes a list of four-tuples, each one specifying a ``table``, ``column``, ``other_table`` and ``other_column``.
Here's an example adding two foreign keys at once:
.. code-block:: python
db.add_foreign_keys([
("dogs", "breed_id", "breeds", "id"),
("dogs", "home_town_id", "towns", "id")
])
This method runs the same checks as ``.add_foreign_keys()`` and will raise ``sqlite_utils.db.AlterError`` if those checks fail.
.. _python_api_index_foreign_keys:
Adding indexes for all foreign keys
-----------------------------------
If you want to ensure that every foreign key column in your database has a corresponding index, you can do so like this:
.. code-block:: python
db.index_foreign_keys()
.. _python_api_drop:
Dropping a table or view
========================
You can drop a table or view using the ``.drop()`` method:
.. code-block:: python
db["my_table"].drop()
Pass ``ignore=True`` if you want to ignore the error caused by the table or view not existing.
.. code-block:: python
db["my_table"].drop(ignore=True)
.. _python_api_transform:
Transforming a table
====================
The SQLite ``ALTER TABLE`` statement is limited. It can add and drop columns and rename tables, but it cannot change column types, change ``NOT NULL`` status or change the primary key for a table.
The ``table.transform()`` method can do all of these things, by implementing a multi-step pattern `described in the SQLite documentation `__:
1. Start a transaction
2. ``CREATE TABLE tablename_new_x123`` with the required changes
3. Copy the old data into the new table using ``INSERT INTO tablename_new_x123 SELECT * FROM tablename;``
4. ``DROP TABLE tablename;``
5. ``ALTER TABLE tablename_new_x123 RENAME TO tablename;``
6. Commit the transaction
The ``.transform()`` method takes a number of parameters, all of which are optional.
As a bonus, calling ``.transform()`` will reformat the schema for the table that is stored in SQLite to make it more readable. This works even if you call it without any arguments.
To keep the original table around instead of dropping it, pass the ``keep_table=`` option and specify the name of the table you would like it to be renamed to:
.. code-block:: python
table.transform(types={"age": int}, keep_table="original_table")
This method raises a ``sqlite_utils.db.TransformError`` exception if the table cannot be transformed, usually because there are existing constraints or indexes that are incompatible with modifications to the columns.
.. _python_api_transform_alter_column_types:
Altering column types
---------------------
To alter the type of a column, use the ``types=`` argument:
.. code-block:: python
# Convert the 'age' column to an integer, and 'weight' to a float
table.transform(types={"age": int, "weight": float})
See :ref:`python_api_add_column` for a list of available types.
.. _python_api_transform_rename_columns:
Renaming columns
----------------
The ``rename=`` parameter can rename columns:
.. code-block:: python
# Rename 'age' to 'initial_age':
table.transform(rename={"age": "initial_age"})
.. _python_api_transform_drop_columns:
Dropping columns
----------------
To drop columns, pass them in the ``drop=`` set:
.. code-block:: python
# Drop the 'age' column:
table.transform(drop={"age"})
.. _python_api_transform_change_primary_keys:
Changing primary keys
---------------------
To change the primary key for a table, use ``pk=``. This can be passed a single column for a regular primary key, or a tuple of columns to create a compound primary key. Passing ``pk=None`` will remove the primary key and convert the table into a ``rowid`` table.
.. code-block:: python
# Make `user_id` the new primary key
table.transform(pk="user_id")
.. _python_api_transform_change_not_null:
Changing not null status
------------------------
You can change the ``NOT NULL`` status of columns by using ``not_null=``. You can pass this a set of columns to make those columns ``NOT NULL``:
.. code-block:: python
# Make the 'age' and 'weight' columns NOT NULL
table.transform(not_null={"age", "weight"})
If you want to take existing ``NOT NULL`` columns and change them to allow null values, you can do so by passing a dictionary of true/false values instead:
.. code-block:: python
# 'age' is NOT NULL but we want to allow NULL:
table.transform(not_null={"age": False})
# Make age allow NULL and switch weight to being NOT NULL:
table.transform(not_null={"age": False, "weight": True})
.. _python_api_transform_alter_column_defaults:
Altering column defaults
------------------------
The ``defaults=`` parameter can be used to set or change the defaults for different columns:
.. code-block:: python
# Set default age to 1:
table.transform(defaults={"age": 1})
# Now remove the default from that column:
table.transform(defaults={"age": None})
.. _python_api_transform_change_column_order:
Changing column order
---------------------
The ``column_order=`` parameter can be used to change the order of the columns. If you pass the names of a subset of the columns those will go first and columns you omitted will appear in their existing order after them.
.. code-block:: python
# Change column order
table.transform(column_order=("name", "age", "id")
.. _python_api_transform_add_foreign_key_constraints:
Adding foreign key constraints
------------------------------
You can add one or more foreign key constraints to a table using the ``add_foreign_keys=`` parameter:
.. code-block:: python
db["places"].transform(
add_foreign_keys=(
("country", "country", "id"),
("continent", "continent", "id")
)
)
This accepts the same arguments described in :ref:`specifying foreign keys ` - so you can specify them as a full tuple of ``(column, other_table, other_column)``, or you can take a shortcut and pass just the name of the column, provided the table can be automatically derived from the column name:
.. code-block:: python
db["places"].transform(
add_foreign_keys=(("country", "continent"))
)
.. _python_api_transform_replace_foreign_key_constraints:
Replacing foreign key constraints
---------------------------------
The ``foreign_keys=`` parameter is similar to to ``add_foreign_keys=`` but can be be used to replace all foreign key constraints on a table, dropping any that are not explicitly mentioned:
.. code-block:: python
db["places"].transform(
foreign_keys=(
("continent", "continent", "id"),
)
)
.. _python_api_transform_drop_foreign_key_constraints:
Dropping foreign key constraints
--------------------------------
You can use ``.transform()`` to remove foreign key constraints from a table.
This example drops two foreign keys - the one from ``places.country`` to ``country.id`` and the one from ``places.continent`` to ``continent.id``:
.. code-block:: python
db["places"].transform(
drop_foreign_keys=("country", "continent")
)
.. _python_api_transform_sql:
Custom transformations with .transform_sql()
--------------------------------------------
The ``.transform()`` method can handle most cases, but it does not automatically upgrade indexes, views or triggers associated with the table that is being transformed.
If you want to do something more advanced, you can call the ``table.transform_sql(...)`` method with the same arguments that you would have passed to ``table.transform(...)``.
This method will return a list of SQL statements that should be executed to implement the change. You can then make modifications to that SQL - or add additional SQL statements - before executing it yourself.
.. _python_api_extract:
Extracting columns into a separate table
========================================
The ``table.extract()`` method can be used to extract specified columns into a separate table.
Imagine a ``Trees`` table that looks like this:
=== ============ =======
id TreeAddress Species
=== ============ =======
1 52 Vine St Palm
2 12 Draft St Oak
3 51 Dark Ave Palm
4 1252 Left St Palm
=== ============ =======
The ``Species`` column contains duplicate values. This database could be improved by extracting that column out into a separate ``Species`` table and pointing to it using a foreign key column.
The schema of the above table is:
.. code-block:: sql
CREATE TABLE [Trees] (
[id] INTEGER PRIMARY KEY,
[TreeAddress] TEXT,
[Species] TEXT
)
Here's how to extract the ``Species`` column using ``.extract()``:
.. code-block:: python
db["Trees"].extract("Species")
After running this code the table schema now looks like this:
.. code-block:: sql
CREATE TABLE "Trees" (
[id] INTEGER PRIMARY KEY,
[TreeAddress] TEXT,
[Species_id] INTEGER,
FOREIGN KEY(Species_id) REFERENCES Species(id)
)
A new ``Species`` table will have been created with the following schema:
.. code-block:: sql
CREATE TABLE [Species] (
[id] INTEGER PRIMARY KEY,
[Species] TEXT
)
The ``.extract()`` method defaults to creating a table with the same name as the column that was extracted, and adding a foreign key column called ``tablename_id``.
You can specify a custom table name using ``table=``, and a custom foreign key name using ``fk_column=``. This example creates a table called ``tree_species`` and a foreign key column called ``tree_species_id``:
.. code-block:: python
db["Trees"].extract("Species", table="tree_species", fk_column="tree_species_id")
The resulting schema looks like this:
.. code-block:: sql
CREATE TABLE "Trees" (
[id] INTEGER PRIMARY KEY,
[TreeAddress] TEXT,
[tree_species_id] INTEGER,
FOREIGN KEY(tree_species_id) REFERENCES tree_species(id)
)
CREATE TABLE [tree_species] (
[id] INTEGER PRIMARY KEY,
[Species] TEXT
)
You can also extract multiple columns into the same external table. Say for example you have a table like this:
=== ============ ========== =========
id TreeAddress CommonName LatinName
=== ============ ========== =========
1 52 Vine St Palm Arecaceae
2 12 Draft St Oak Quercus
3 51 Dark Ave Palm Arecaceae
4 1252 Left St Palm Arecaceae
=== ============ ========== =========
You can pass ``["CommonName", "LatinName"]`` to ``.extract()`` to extract both of those columns:
.. code-block:: python
db["Trees"].extract(["CommonName", "LatinName"])
This produces the following schema:
.. code-block:: sql
CREATE TABLE "Trees" (
[id] INTEGER PRIMARY KEY,
[TreeAddress] TEXT,
[CommonName_LatinName_id] INTEGER,
FOREIGN KEY(CommonName_LatinName_id) REFERENCES CommonName_LatinName(id)
)
CREATE TABLE [CommonName_LatinName] (
[id] INTEGER PRIMARY KEY,
[CommonName] TEXT,
[LatinName] TEXT
)
The table name ``CommonName_LatinName`` is derived from the extract columns. You can use ``table=`` and ``fk_column=`` to specify custom names like this:
.. code-block:: python
db["Trees"].extract(["CommonName", "LatinName"], table="Species", fk_column="species_id")
This produces the following schema:
.. code-block:: sql
CREATE TABLE "Trees" (
[id] INTEGER PRIMARY KEY,
[TreeAddress] TEXT,
[species_id] INTEGER,
FOREIGN KEY(species_id) REFERENCES Species(id)
)
CREATE TABLE [Species] (
[id] INTEGER PRIMARY KEY,
[CommonName] TEXT,
[LatinName] TEXT
)
You can use the ``rename=`` argument to rename columns in the lookup table. To create a ``Species`` table with columns called ``name`` and ``latin`` you can do this:
.. code-block:: python
db["Trees"].extract(
["CommonName", "LatinName"],
table="Species",
fk_column="species_id",
rename={"CommonName": "name", "LatinName": "latin"}
)
This produces a lookup table like so:
.. code-block:: sql
CREATE TABLE [Species] (
[id] INTEGER PRIMARY KEY,
[name] TEXT,
[latin] TEXT
)
.. _python_api_hash:
Setting an ID based on the hash of the row contents
===================================================
Sometimes you will find yourself working with a dataset that includes rows that do not have a provided obvious ID, but where you would like to assign one so that you can later upsert into that table without creating duplicate records.
In these cases, a useful technique is to create an ID that is derived from the sha1 hash of the row contents.
``sqlite-utils`` can do this for you using the ``hash_id=`` option. For example::
db = sqlite_utils.Database("dogs.db")
db["dogs"].upsert({"name": "Cleo", "twitter": "cleopaws"}, hash_id="id")
print(list(db["dogs]))
Outputs::
[{'id': 'f501265970505d9825d8d9f590bfab3519fb20b1', 'name': 'Cleo', 'twitter': 'cleopaws'}]
If you are going to use that ID straight away, you can access it using ``last_pk``::
dog_id = db["dogs"].upsert({
"name": "Cleo",
"twitter": "cleopaws"
}, hash_id="id").last_pk
# dog_id is now "f501265970505d9825d8d9f590bfab3519fb20b1"
The hash will be created using all of the column values. To create a hash using a subset of the columns, pass the ``hash_id_columns=`` parameter::
db["dogs"].upsert(
{"name": "Cleo", "twitter": "cleopaws", "age": 7},
hash_id_columns=("name", "twitter")
)
The ``hash_id=`` parameter is optional if you specify ``hash_id_columns=`` - it will default to putting the hash in a column called ``id``.
You can manually calculate these hashes using the :ref:`hash_record(record, keys=...) ` utility function.
.. _python_api_create_view:
Creating views
==============
The ``.create_view()`` method on the database class can be used to create a view:
.. code-block:: python
db.create_view("good_dogs", """
select * from dogs where is_good_dog = 1
""")
This will raise a ``sqlite_utils.utils.OperationalError`` if a view with that name already exists.
You can pass ``ignore=True`` to silently ignore an existing view and do nothing, or ``replace=True`` to replace an existing view with a new definition if your select statement differs from the current view:
.. code-block:: python
db.create_view("good_dogs", """
select * from dogs where is_good_dog = 1
""", replace=True)
Storing JSON
============
SQLite has `excellent JSON support `_, and ``sqlite-utils`` can help you take advantage of this: if you attempt to insert a value that can be represented as a JSON list or dictionary, ``sqlite-utils`` will create TEXT column and store your data as serialized JSON. This means you can quickly store even complex data structures in SQLite and query them using JSON features.
For example:
.. code-block:: python
db["niche_museums"].insert({
"name": "The Bigfoot Discovery Museum",
"url": "http://bigfootdiscoveryproject.com/"
"hours": {
"Monday": [11, 18],
"Wednesday": [11, 18],
"Thursday": [11, 18],
"Friday": [11, 18],
"Saturday": [11, 18],
"Sunday": [11, 18]
},
"address": {
"streetAddress": "5497 Highway 9",
"addressLocality": "Felton, CA",
"postalCode": "95018"
}
})
db.execute("""
select json_extract(address, '$.addressLocality')
from niche_museums
""").fetchall()
# Returns [('Felton, CA',)]
.. _python_api_conversions:
Converting column values using SQL functions
============================================
Sometimes it can be useful to run values through a SQL function prior to inserting them. A simple example might be converting a value to upper case while it is being inserted.
The ``conversions={...}`` parameter can be used to specify custom SQL to be used as part of a ``INSERT`` or ``UPDATE`` SQL statement.
You can specify an upper case conversion for a specific column like so:
.. code-block:: python
db["example"].insert({
"name": "The Bigfoot Discovery Museum"
}, conversions={"name": "upper(?)"})
# list(db["example"].rows) now returns:
# [{'name': 'THE BIGFOOT DISCOVERY MUSEUM'}]
The dictionary key is the column name to be converted. The value is the SQL fragment to use, with a ``?`` placeholder for the original value.
A more useful example: if you are working with `SpatiaLite `__ you may find yourself wanting to create geometry values from a WKT value. Code to do that could look like this:
.. code-block:: python
import sqlite3
import sqlite_utils
from shapely.geometry import shape
import httpx
db = sqlite_utils.Database("places.db")
# Initialize SpatiaLite
db.init_spatialite()
# Use sqlite-utils to create a places table
places = db["places"].create({"id": int, "name": str})
# Add a SpatiaLite 'geometry' column
places.add_geometry_column("geometry", "MULTIPOLYGON")
# Fetch some GeoJSON from Who's On First:
geojson = httpx.get(
"https://raw.githubusercontent.com/whosonfirst-data/"
"whosonfirst-data-admin-gb/master/data/404/227/475/404227475.geojson"
).json()
# Convert to "Well Known Text" format using shapely
wkt = shape(geojson["geometry"]).wkt
# Insert the record, converting the WKT to a SpatiaLite geometry:
db["places"].insert(
{"name": "Wales", "geometry": wkt},
conversions={"geometry": "GeomFromText(?, 4326)"},
)
This example uses gographical data from `Who's On First `__ and depends on the `Shapely `__ and `HTTPX `__ Python libraries.
.. _python_api_sqlite_version:
Checking the SQLite version
===========================
The ``db.sqlite_version`` property returns a tuple of integers representing the version of SQLite used for that database object::
>>> db.sqlite_version
(3, 36, 0)
.. _python_api_itedump:
Dumping the database to SQL
===========================
The ``db.iterdump()`` method returns a sequence of SQL strings representing a complete dump of the database. Use it like this:
.. code-block:: python
full_sql = "".join(db.iterdump())
This uses the `sqlite3.Connection.iterdump() `__ method.
If you are using ``pysqlite3`` or ``sqlean.py`` the underlying method may be missing. If you install the `sqlite-dump `__ package then the ``db.iterdump()`` method will use that implementation instead:
.. code-block:: bash
pip install sqlite-dump
.. _python_api_introspection:
Introspecting tables and views
==============================
If you have loaded an existing table or view, you can use introspection to find out more about it::
>>> db["PlantType"]
.. _python_api_introspection_exists:
.exists()
---------
The ``.exists()`` method can be used to find out if a table exists or not::
>>> db["PlantType"].exists()
True
>>> db["PlantType2"].exists()
False
.. _python_api_introspection_count:
.count
------
The ``.count`` property shows the current number of rows (``select count(*) from table``)::
>>> db["PlantType"].count
3
>>> db["Street_Tree_List"].count
189144
This property will take advantage of :ref:`python_api_cached_table_counts` if the ``use_counts_table`` property is set on the database. You can avoid that optimization entirely by calling ``table.count_where()`` instead of accessing the property.
.. _python_api_introspection_columns:
.columns
--------
The ``.columns`` property shows the columns in the table or view. It returns a list of ``Column(cid, name, type, notnull, default_value, is_pk)`` named tuples.
::
>>> db["PlantType"].columns
[Column(cid=0, name='id', type='INTEGER', notnull=0, default_value=None, is_pk=1),
Column(cid=1, name='value', type='TEXT', notnull=0, default_value=None, is_pk=0)]
.. _python_api_introspection_columns_dict:
.columns_dict
-------------
The ``.columns_dict`` property returns a dictionary version of the columns with just the names and Python types::
>>> db["PlantType"].columns_dict
{'id': , 'value': }
.. _python_api_introspection_default_values:
.default_values
---------------
The ``.default_values`` property returns a dictionary of default values for each column that has a default::
>>> db["table_with_defaults"].default_values
{'score': 5}
.. _python_api_introspection_pks:
.pks
----
The ``.pks`` property returns a list of strings naming the primary key columns for the table::
>>> db["PlantType"].pks
['id']
If a table has no primary keys but is a `rowid table `__, this property will return ``['rowid']``.
.. _python_api_introspection_use_rowid:
.use_rowid
----------
Almost all SQLite tables have a ``rowid`` column, but a table with no explicitly defined primary keys must use that ``rowid`` as the primary key for identifying individual rows. The ``.use_rowid`` property checks to see if a table needs to use the ``rowid`` in this way - it returns ``True`` if the table has no explicitly defined primary keys and ``False`` otherwise.
>>> db["PlantType"].use_rowid
False
.. _python_api_introspection_foreign_keys:
.foreign_keys
-------------
The ``.foreign_keys`` property returns any foreign key relationships for the table, as a list of ``ForeignKey(table, column, other_table, other_column)`` named tuples. It is not available on views.
::
>>> db["Street_Tree_List"].foreign_keys
[ForeignKey(table='Street_Tree_List', column='qLegalStatus', other_table='qLegalStatus', other_column='id'),
ForeignKey(table='Street_Tree_List', column='qCareAssistant', other_table='qCareAssistant', other_column='id'),
ForeignKey(table='Street_Tree_List', column='qSiteInfo', other_table='qSiteInfo', other_column='id'),
ForeignKey(table='Street_Tree_List', column='qSpecies', other_table='qSpecies', other_column='id'),
ForeignKey(table='Street_Tree_List', column='qCaretaker', other_table='qCaretaker', other_column='id'),
ForeignKey(table='Street_Tree_List', column='PlantType', other_table='PlantType', other_column='id')]
.. _python_api_introspection_schema:
.schema
-------
The ``.schema`` property outputs the table's schema as a SQL string::
>>> print(db["Street_Tree_List"].schema)
CREATE TABLE "Street_Tree_List" (
"TreeID" INTEGER,
"qLegalStatus" INTEGER,
"qSpecies" INTEGER,
"qAddress" TEXT,
"SiteOrder" INTEGER,
"qSiteInfo" INTEGER,
"PlantType" INTEGER,
"qCaretaker" INTEGER,
"qCareAssistant" INTEGER,
"PlantDate" TEXT,
"DBH" INTEGER,
"PlotSize" TEXT,
"PermitNotes" TEXT,
"XCoord" REAL,
"YCoord" REAL,
"Latitude" REAL,
"Longitude" REAL,
"Location" TEXT
,
FOREIGN KEY ("PlantType") REFERENCES [PlantType](id),
FOREIGN KEY ("qCaretaker") REFERENCES [qCaretaker](id),
FOREIGN KEY ("qSpecies") REFERENCES [qSpecies](id),
FOREIGN KEY ("qSiteInfo") REFERENCES [qSiteInfo](id),
FOREIGN KEY ("qCareAssistant") REFERENCES [qCareAssistant](id),
FOREIGN KEY ("qLegalStatus") REFERENCES [qLegalStatus](id))
.. _python_api_introspection_strict:
.strict
-------
The ``.strict`` property identifies if the table is a `SQLite STRICT table `__.
::
>>> db["ny_times_us_counties"].strict
False
.. _python_api_introspection_indexes:
.indexes
--------
The ``.indexes`` property returns all indexes created for a table, as a list of ``Index(seq, name, unique, origin, partial, columns)`` named tuples. It is not available on views.
::
>>> db["Street_Tree_List"].indexes
[Index(seq=0, name='"Street_Tree_List_qLegalStatus"', unique=0, origin='c', partial=0, columns=['qLegalStatus']),
Index(seq=1, name='"Street_Tree_List_qCareAssistant"', unique=0, origin='c', partial=0, columns=['qCareAssistant']),
Index(seq=2, name='"Street_Tree_List_qSiteInfo"', unique=0, origin='c', partial=0, columns=['qSiteInfo']),
Index(seq=3, name='"Street_Tree_List_qSpecies"', unique=0, origin='c', partial=0, columns=['qSpecies']),
Index(seq=4, name='"Street_Tree_List_qCaretaker"', unique=0, origin='c', partial=0, columns=['qCaretaker']),
Index(seq=5, name='"Street_Tree_List_PlantType"', unique=0, origin='c', partial=0, columns=['PlantType'])]
.. _python_api_introspection_xindexes:
.xindexes
---------
The ``.xindexes`` property returns more detailed information about the indexes on the table, using the SQLite `PRAGMA index_xinfo() `__ mechanism. It returns a list of ``XIndex(name, columns)`` named tuples, where ``columns`` is a list of ``XIndexColumn(seqno, cid, name, desc, coll, key)`` named tuples.
::
>>> db["ny_times_us_counties"].xindexes
[
XIndex(
name='idx_ny_times_us_counties_date',
columns=[
XIndexColumn(seqno=0, cid=0, name='date', desc=1, coll='BINARY', key=1),
XIndexColumn(seqno=1, cid=-1, name=None, desc=0, coll='BINARY', key=0)
]
),
XIndex(
name='idx_ny_times_us_counties_fips',
columns=[
XIndexColumn(seqno=0, cid=3, name='fips', desc=0, coll='BINARY', key=1),
XIndexColumn(seqno=1, cid=-1, name=None, desc=0, coll='BINARY', key=0)
]
)
]
.. _python_api_introspection_triggers:
.triggers
---------
The ``.triggers`` property lists database triggers. It can be used on both database and table objects. It returns a list of ``Trigger(name, table, sql)`` named tuples.
::
>>> db["authors"].triggers
[Trigger(name='authors_ai', table='authors', sql='CREATE TRIGGER [authors_ai] AFTER INSERT...'),
Trigger(name='authors_ad', table='authors', sql="CREATE TRIGGER [authors_ad] AFTER DELETE..."),
Trigger(name='authors_au', table='authors', sql="CREATE TRIGGER [authors_au] AFTER UPDATE")]
>>> db.triggers
... similar output to db["authors"].triggers
.. _python_api_introspection_triggers_dict:
.triggers_dict
--------------
The ``.triggers_dict`` property returns the triggers for that table as a dictionary mapping their names to their SQL definitions.
::
>>> db["authors"].triggers_dict
{'authors_ai': 'CREATE TRIGGER [authors_ai] AFTER INSERT...',
'authors_ad': 'CREATE TRIGGER [authors_ad] AFTER DELETE...',
'authors_au': 'CREATE TRIGGER [authors_au] AFTER UPDATE'}
The same property exists on the database, and will return all triggers across all tables:
::
>>> db.triggers_dict
{'authors_ai': 'CREATE TRIGGER [authors_ai] AFTER INSERT...',
'authors_ad': 'CREATE TRIGGER [authors_ad] AFTER DELETE...',
'authors_au': 'CREATE TRIGGER [authors_au] AFTER UPDATE'}
.. _python_api_introspection_detect_fts:
.detect_fts()
-------------
The ``detect_fts()`` method returns the associated SQLite FTS table name, if one exists for this table. If the table has not been configured for full-text search it returns ``None``.
::
>>> db["authors"].detect_fts()
"authors_fts"
.. _python_api_introspection_virtual_table_using:
.virtual_table_using
--------------------
The ``.virtual_table_using`` property reveals if a table is a virtual table. It returns ``None`` for regular tables and the upper case version of the type of virtual table otherwise. For example::
>>> db["authors"].enable_fts(["name"])
>>> db["authors_fts"].virtual_table_using
"FTS5"
.. _python_api_introspection_has_counts_triggers:
.has_counts_triggers
--------------------
The ``.has_counts_triggers`` property shows if a table has been configured with triggers for updating a ``_counts`` table, as described in :ref:`python_api_cached_table_counts`.
::
>>> db["authors"].has_counts_triggers
False
>>> db["authors"].enable_counts()
>>> db["authors"].has_counts_triggers
True
.. _python_api_introspection_supports_strict:
db.supports_strict
------------------
This property on the database object returns ``True`` if the available SQLite version supports `STRICT mode `__, which was added in SQLite 3.37.0 (on 2021-11-27).
::
>>> db.supports_strict
True
.. _python_api_fts:
Full-text search
================
SQLite includes bundled extensions that implement `powerful full-text search `__.
.. _python_api_fts_enable:
Enabling full-text search for a table
-------------------------------------
You can enable full-text search on a table using ``.enable_fts(columns)``:
.. code-block:: python
db["dogs"].enable_fts(["name", "twitter"])
You can then run searches using the ``.search()`` method:
.. code-block:: python
rows = list(db["dogs"].search("cleo"))
This method returns a generator that can be looped over to get dictionaries for each row, similar to :ref:`python_api_rows`.
If you insert additional records into the table you will need to refresh the search index using ``populate_fts()``:
.. code-block:: python
db["dogs"].insert({
"id": 2,
"name": "Marnie",
"twitter": "MarnieTheDog",
"age": 16,
"is_good_dog": True,
}, pk="id")
db["dogs"].populate_fts(["name", "twitter"])
A better solution is to use database triggers. You can set up database triggers to automatically update the full-text index using ``create_triggers=True``:
.. code-block:: python
db["dogs"].enable_fts(["name", "twitter"], create_triggers=True)
``.enable_fts()`` defaults to using `FTS5 `__. If you wish to use `FTS4 `__ instead, use the following:
.. code-block:: python
db["dogs"].enable_fts(["name", "twitter"], fts_version="FTS4")
You can customize the tokenizer configured for the table using the ``tokenize=`` parameter. For example, to enable Porter stemming, where English words like "running" will match stemmed alternatives such as "run", use ``tokenize="porter"``:
.. code-block:: python
db["articles"].enable_fts(["headline", "body"], tokenize="porter")
The SQLite documentation has more on `FTS5 tokenizers