pax_global_header00006660000000000000000000000064137014270460014516gustar00rootroot0000000000000052 comment=4401dca43faae30c6b3146662032173dab83c194 jdupes-1.18.1/000077500000000000000000000000001370142704600131005ustar00rootroot00000000000000jdupes-1.18.1/.gitignore000066400000000000000000000006331370142704600150720ustar00rootroot00000000000000# # Build ignores # #.* *.o *.o.* *.a *.so *.so.* *.1.gz # # Never ignore these # !.gitignore # # Normal output and testing dirs # /build_date.h /jdupes /jdupes*.exe /jdupes-standalone /jdupes-*-win* /jdupes-*-mac* /*.pkg.tar.xz test_temp output.log # # Backups / patches # *~ *.orig *.rej /*.patch # # debugging and editor stuff # core .gdb_history .gdbinit .*.swp *.gcda *.gcno *.gcov # Mac OS .DS_Store jdupes-1.18.1/CHANGES000066400000000000000000000215531370142704600141010ustar00rootroot00000000000000jdupes 1.18.1 - Fix -X newer/older on Windows by writing a local strptime() substitute jdupes 1.18.0 - Add -X newer/older extfilters to reject files by modification date jdupes 1.17.1 - Add basic APFS clonefile() support to -B dedupe jdupes 1.17.0 - Rewrite dedupe code from scratch, probably fixing all dedupe bugs - extfilter: add substring match filter for file paths - Add -u/--printunique option to print all non-duplicates (unmatched files) - Dedupe-blacklisted kernel version check now happens before work is done - Build warnings fixed; report any you get (except ENABLE_DEDUPE=1 #warning) - New build targets static and static_stripped (for static binary builds) jdupes 1.16.0 - Add -X noext/onlyext filters to exclude/requite specific file extension(s) - Added in-depth help text for -X/--extfilter (use -X help to view) - Clarify hard link limit behavior on Windows in program help text - This version still has BTRFS dedupe issues and file add-by-name disabled jdupes 1.15.0 - Disable single file addition on the command line for safety jdupes 1.14.1 - Fix some compilation issues - Add example shell scripts for processing piped jdupes output - Add `stupid_dupes` educational shell script to source code - Fix some swapped/mangled help text in program and documentation - LOW_MEMORY compiles exclude more stuff to further reduce usage jdupes 1.14.0 - Long option --exclude is deprecated and renamed --extfilter - BTRFS compile options have been generalized to dedupe (see README) - Fix a bug in 1.13.3 where many options caused an inappropriate exit jdupes 1.13.3 - Fix the behavior of the -I/--isolate option - Move BTRFS-specific dedupe interfaces to general Linux 4.5+ interfaces - Change BTRFS compilation flag name (see README) - Fix FS dedupe only working on the first 16 MiB of files - Add FS dedupe static header for when this header is mising - Add EXE version info for generated Windows executables - Correct several copyright dates and comments jdupes 1.13.2 - Fix Unicode and escaped in JSON output jdupes 1.13.1 - Fix an incorrect NULL pointer check jdupes 1.13 - Add new option -j/--json for JSON (machine-readable) output - /usr/local is now the default PREFIX in Makefile - Minor performance optimizations - A few minor bug fixes jdupes 1.12 - Small reductions in memory usage - Add "standalone" jdupes C file which has no external requirements - Add ability to toggle -Z with a USR1 signal (not available on Windows) - Add -t/-no-tocttou option to disable file change safety checks jdupes 1.11.1 - Disable build date embedding by default to make reproducible builds easier jdupes 1.11 - Add new option -T for partial hash matches only (dangerous!) - Fix '-P partial' printing jdupes 1.10.4 - Fix a bug that caused -x/--xsize to fail randomly jdupes 1.10.3 - Add -M/--printwithsummary option - Add -0/--printnull option - Add very long path support on Windows 10 - Do not output progress indicators if output is not a TTY - Remove an old undocumented long option '--summary' jdupes 1.10.2 - Add -P/--print option jdupes 1.10.1 - Fix -I option jdupes 1.10 - cacheinfo code not included on Windows where it is not used - Fix -H to work properly on individual files (not just directories) - Fix memory corruption which causes a crash when using -A option - Block btrfs dedupe on Linux kernels < 3.0 due to possible data loss bugs - Removed all references to 'fdupes-jody' and unused TODO file - Add -C/--chunksize option for tuning I/O chunk size (see README) - Make more features configurable and exclude them in LOW_MEMORY mode - Remove HAVE_BTRFS_IOCTL_H deprecated compile-time option - Remove experimental tree rebalance code jdupes 1.9 - stderr on Windows is no longer polluted or empty when redirected - Added -1/--one-file-system to restrict recursion to the same filesystem - Added a universal exclusion stack which is currently only used for -X - Added -X/--exclude to use exclusion stack; supersedes -x/--xsize - More robust BTRFS enablement behavior in Makefile - Fixed Unicode display for hard linking on Windows - Efficiency improvements to internal memory allocator (string_malloc) - Documentation improvements and updates - Provide "fdupes_oneline.sh" which emulates old "fdupes -1" feature - Single file names passed as arguments are now accepted and processed jdupes 1.8 - All files are now licensed under The MIT License exclusively - Fixed a serious memory alloc bug; upgrading is *strongly* recommended - Several huge improvements to progress indicators - Fix some error message display problems and add more error checking - Fixes for several potential crashes and buffer overflows - Indicate no duplicates were found if printing matches and none exist - On Linux, jdupes now auto-tunes I/O size based on CPU L1 D-cache size - The -v switch now also shows info about bitness in the version string jdupes 1.7 - Incompatible change: zero-length files no longer duplicates by default - New -z/--zeromatch option to consider zero-length files as duplicates - I/O chunk size changed for better performance - The PROGRAM_NAME variable is now used properly during make - Program was re-organized into several split C files jdupes 1.6.2 - Fix: version number shown in jdupes -v wasn't updated in 1.6.1 - Prevent BTRFS dedupe of more files than the kernel can handle - Track directories to avoid scanning the same directory twice jdupes 1.6.1 - Show backslash instead of forward slash as path separator on Windows - Make BTRFS dedupe error messages more informative and less confusing - Minor code tweaks, typo and help text fixes - Split some functions into separate files (jdupes.c was getting large) jdupes 1.6 - Add the -l/--linksoft option to create symbolic links from duplicates - Disable following symlinks to directories when -s/--symlinks is used - Reduce overall memory usage by approximately 5% - Add configurable path buffer sizes and path buffer overflow checks - Fixes for some build warnings seen on ARM and MIPS jdupes 1.5.1 - Significant reduction in memory usage (with a bonus tiny speed boost) - Improvements in string_malloc memory allocator code - Bug fixes for output formatting inconsistencies - Major BTRFS dedupe compilation and functionality fixes - LOW_MEMORY compile option added for more size/speed tradeoff control jdupes 1.5 - Invert -Z option: only "soft abort" if asked explicitly to do so - Tweak internal data chunk size to reduce data cache misses - Fix partial hash optimization - Change PREFIX for building from /usr/local back to /usr jdupes 1.4 - Add support for Unicode file paths on Windows platforms - Discard floating point code of dubious value - Remove -1/--sameline feature which is not practically useful - Process partially complete duplicate scan if CTRL+C is pressed - Add -Z/--hardabort option to disable the new CTRL+C behavior - Add [n]one option to -d/--delete to discard all files in a match set - Minor bug fixes and tweaks to improve behavior jdupes 1.3 - Add -i/--reverse to invert the match sort order - Add -I/--isolate to force cross-parameter matching - Add "loud" debugging messages (-@ switch, build with 'make LOUD=1') jdupes 1.2.1 - Fix a serious bug that caused some duplicates to be missed jdupes 1.2 - Change I/O block size for improved performance - Improved progress indicator behavior with large files; now the progress indicator will update more frequently when full file reads are needed - Windows read speed boost with _O_SEQUENTIAL file flag - Experimental tree rebalance code tuning jdupes 1.1.1 - Fix a bug where the -r switch was always on even if not specified jdupes 1.1 - Work around the 1023-link limit for Windows hard linking so that linking can continue even when the limit is reached - Update documentation to include hard link arrow explanations - Add "time of check to time of use" checks immediately prior to taking actions on files so that files which changed since being checked will not be touched, avoiding potential data loss on "live" data sets - Add debug stats for files skipped due to Windows hard link limit - Change default sort to filename instead of modification time - Replaced Windows "get inode number" code with simpler, faster version - Fixed a bug where an extra newline was at the end of printed matches - Reduced progress delay interval; it was a bit slow on many large files jdupes 1.0.2 - Update jody_hash code to latest version - Change string_malloc to enable future string_free() improvements - Add string_malloc counters for debug stat mode - Add '+size' option to -x/--xsize switch to exclude files larger than the specified size instead of smaller than that size jdupes 1.0.1 - Fix bug in deletion set counter that would show e.g. "Set 1 of 0" - Minor size reductions by merging repeated fixed strings - Add feature flag 'fastmath' to show when compiled with -ffast-math - Corrections to code driven by -Wconversion and -Wwrite-strings jdupes 1.0 First release. For changes before the 'jdupes' name change, see OLD_CHANGES jdupes-1.18.1/INSTALL000066400000000000000000000110131370142704600141250ustar00rootroot00000000000000Installing jdupes ----------------------------------------------------------------------------- To install the program with the default options and flags, just issue the following commands (note that btrfs support is off by default): make su root make install This will install the program in /usr/bin. You may change this to a different location by editing the Makefile. Please refer to the Makefile for an explanation of compile-time options. If you're having trouble compiling, please take a look at the Makefile. Various build options are available and can be turned on at compile time by setting CFLAGS_EXTRA or by passing it to 'make': make CFLAGS_EXTRA=-DYOUR_OPTION make CFLAGS_EXTRA='-DYOUR_OPTION_ONE -DYOUR_OPTION_TWO' This is a list of options that can be "turned on" this way: OMIT_GETOPT_LONG Do not use getopt_long() C library call ON_WINDOWS Modify code to compile with MinGW on Windows Certain options need to be turned on by setting a variable passed to make instead of using CFLAGS_EXTRA, i.e. 'make DEBUG=1': DEBUG Turn on algorithm statistic reporting with '-D' LOUD '-@' for low-level debugging; enables DEBUG ENABLE_DEDUPE Enable '-B/--dedupe' deduplication features STATIC_DEDUPE_H Build dedupe support with included minimal header file LOW_MEMORY Build for lower memory usage instead of speed NO_PERMS Disable permission options and code NO_HARDLINKS Disable hard linking options and code NO_SYMLINKS Disable symbolic linking options and code NO_USER_ORDER Disable -I/-O options and code NO_UNICODE [Windows only] disable all Unicode support EMBED_BUILD_DATE Hard-code the build date into the binary The LOW_MEMORY option tweaks various knobs in the program to lower total memory usage. It also disables some features to reduce the size of certain data structures. The improvements in memory usage are not very large, but if you're running in a very RAM-limited environment or have a CPU with very small caches it may be a good choice. If you are building binaries that will have the same version number and still need a way to differentiate the binaries, you can use EMBED_BUILD_DATE to add the date of the build to the version info in 'jdupes -v'. Note that you must have a build that will rebuild jdupes.c (such as after a 'make clean') for the shown build date to actually update. This option also makes it impossible to create reproducible builds if that's important to you. A test directory is included so that you may familiarize yourself with the way jdupes operates. You may test the program before installing it by issuing a command such as "./jdupes testdir" or "./jdupes -r testdir", just to name a couple of examples. See the README for information on valid options. A comparison shell script is also included. It will run your natively installed 'jdupes' or 'jdupes' with the directories and extra options you specify and compare the run times and output a 'diff' of the two program outputs. Unless the core algorithm or sort behavior is changed, both programs should produce identical outputs and the 'diff' output shouldn't appear at all. To use it, type: ./compare_jdupes.sh [options] A stand-alone version of jdupes that consolidates most of the program's functionality into a single C file is included with this source code. Major differences include reduction or elimination of some text strings, using an embedded 32-bit jody_hash implementation instead of relying on xxHash64, removal of all DEBUG/LOUD and Windows support code, replacement of fancy numeric sorting with the faster but naive strcmp() sort method, and other minor adjustments and consolidations appropriate for single-file compilation. This version of the program is suitable for inclusion in "Swiss army knife" projects such as BusyBox and Toybox. The standalone version is not meant to work on Windows; it has all of the quirks for Windows support stripped out and there's no real advantage to using it on Windows anyway. However, if you need added stress in your life and you understand that this is NOT SUPPORTED and YOU'RE 100% ON YOUR OWN, you can compile it with this make command and it'll even partially work: make standalone NO_UNICODE=1 CFLAGS_EXTRA='-DNO_PERMS -DNO_SYMLINKS -DNO_HARDLINKS' There are some crude "package" generators included as make targets: package Uses chroots under /chroot to build Linux packages winpackage Makes Windows MinGW 32-bit and 64-bit packages (zip) macpackage Makes macOS 64-bit packages (zip) jdupes-1.18.1/LICENSE000066400000000000000000000022401370142704600141030ustar00rootroot00000000000000The MIT License (MIT) Copyright (C) 2015-2020 Jody Lee Bruchon and contributors Forked from fdupes 1.51, Copyright (C) 1999-2014 Adrian Lopez and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jdupes-1.18.1/Makefile000066400000000000000000000137311370142704600145450ustar00rootroot00000000000000# jdupes Makefile ##################################################################### # Standand User Configuration Section # ##################################################################### # PREFIX determines where files will be installed. Common examples # include "/usr" or "/usr/local". PREFIX = /usr/local # Certain platforms do not support long options (command line options). # To disable long options, uncomment the following line. #CFLAGS += -DOMIT_GETOPT_LONG # Uncomment for -B/--dedupe. # This can also be enabled at build time: 'make ENABLE_DEDUPE=1' #CFLAGS += -DENABLE_DEDUPE # Uncomment for low memory usage at the expense of speed and features # This can be enabled at build time: 'make LOW_MEMORY=1' #LOW_MEMORY=1 # Uncomment this to build in hardened mode. # This can be enabled at build time: 'make HARDEN=1' #HARDEN=1 ##################################################################### # Developer Configuration Section # ##################################################################### # PROGRAM_NAME determines the installation name and manual page name PROGRAM_NAME = jdupes # BIN_DIR indicates directory where program is to be installed. # Suggested value is "$(PREFIX)/bin" BIN_DIR = $(PREFIX)/bin # MAN_DIR indicates directory where the jdupes man page is to be # installed. Suggested value is "$(PREFIX)/man/man1" MAN_BASE_DIR = $(PREFIX)/share/man MAN_DIR = $(MAN_BASE_DIR)/man1 MAN_EXT = 1 # Required External Tools INSTALL = install # install : UCB/GNU Install compatiable #INSTALL = ginstall RM = rm -f RMDIR = rmdir -p MKDIR = mkdir -p #MKDIR = mkdirhier #MKDIR = mkinstalldirs # Make Configuration CC ?= gcc COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef COMPILER_OPTIONS += -Wshadow -Wfloat-equal -Wstrict-overflow=5 -Waggregate-return -Wcast-qual -Wswitch-default -Wswitch-enum -Wconversion -Wunreachable-code -Wformat=2 -Winit-self COMPILER_OPTIONS += -std=gnu99 -O2 -g -D_FILE_OFFSET_BITS=64 -fstrict-aliasing -pipe COMPILER_OPTIONS += -DSMA_MAX_FREE=11 ##################################################################### # no need to modify anything beyond this point # ##################################################################### # Set built-on date for display in program version info screen ifdef EMBED_BUILD_DATE BD=$(shell date +"\"%Y-%m-%d %H:%M:%S %z\"") $(shell echo "#define BUILT_ON_DATE \"$(BD)\"" > build_date.h) COMPILER_OPTIONS += -DBUILD_DATE endif # Debugging code inclusion ifdef LOUD DEBUG=1 COMPILER_OPTIONS += -DLOUD_DEBUG endif ifdef DEBUG COMPILER_OPTIONS += -DDEBUG else COMPILER_OPTIONS += -DNDEBUG endif ifdef HARDEN COMPILER_OPTIONS += -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -fpie -Wl,-z,relro -Wl,-z,now endif # Catch someone trying to enable BTRFS in flags and turn on ENABLE_DEDUPE ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS) $(CFLAGS_EXTRA))) ENABLE_DEDUPE=1 endif ifneq (,$(findstring DENABLE_DEDUPE,$(CFLAGS) $(CFLAGS_EXTRA))) ENABLE_DEDUPE=1 endif # MinGW needs this for printf() conversions to work ifeq ($(OS), Windows_NT) ifndef NO_UNICODE UNICODE=1 COMPILER_OPTIONS += -municode PROGRAM_SUFFIX=.exe endif COMPILER_OPTIONS += -D__USE_MINGW_ANSI_STDIO=1 -DON_WINDOWS=1 OBJS += win_stat.o winres.o override undefine ENABLE_DEDUPE endif # Compatibility mappings for dedupe feature ifdef ENABLE_BTRFS ENABLE_DEDUPE=1 endif ifdef STATIC_BTRFS_H STATIC_DEDUPE_H=1 endif # Dedupe feature (originally only BTRFS, now generalized) ifdef ENABLE_DEDUPE COMPILER_OPTIONS += -DENABLE_DEDUPE OBJS += act_dedupefiles.o else OBJS_CLEAN += act_dedupefiles.o endif ifdef STATIC_DEDUPE_H COMPILER_OPTIONS += -DSTATIC_DEDUPE_H endif # Low memory mode ifdef LOW_MEMORY COMPILER_OPTIONS += -DLOW_MEMORY -DSMA_PAGE_SIZE=32768 -DCHUNK_SIZE=16384 -DNO_HARDLINKS -DNO_SYMLINKS -DNO_USER_ORDER -DNO_PERMS endif CFLAGS += $(COMPILER_OPTIONS) $(CFLAGS_EXTRA) INSTALL_PROGRAM = $(INSTALL) -m 0755 INSTALL_DATA = $(INSTALL) -m 0644 # ADDITIONAL_OBJECTS - some platforms will need additional object files # to support features not supplied by their vendor. Eg: GNU getopt() #ADDITIONAL_OBJECTS += getopt.o OBJS += jdupes.o jody_paths.o jody_sort.o jody_win_unicode.o jody_strtoepoch.o string_malloc.o OBJS += jody_cacheinfo.o OBJS += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o act_printjson.o OBJS += xxhash.o OBJS += $(ADDITIONAL_OBJECTS) OBJS_CLEAN += jdupes-standalone all: $(PROGRAM_NAME) static: $(PROGRAM_NAME) $(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJS) -static static_stripped: $(PROGRAM_NAME) $(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJS) -static strip $(PROGRAM_NAME) $(PROGRAM_NAME): $(OBJS) $(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJS) winres.o : winres.rc winres.manifest.xml ./tune_winres.sh windres winres.rc winres.o standalone: jdupes-standalone installdirs: test -e $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR) test -e $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR) install: $(PROGRAM_NAME) installdirs $(INSTALL_PROGRAM) $(PROGRAM_NAME) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME) $(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT) uninstalldirs: -test -e $(DESTDIR)$(BIN_DIR) && $(RMDIR) $(DESTDIR)$(BIN_DIR) -test -e $(DESTDIR)$(MAN_DIR) && $(RMDIR) $(DESTDIR)$(MAN_DIR) uninstall: uninstalldirs $(RM) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME) $(RM) $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT) test: ./test.sh stripped: $(PROGRAM_NAME) strip $(PROGRAM_NAME)$(PROGRAM_SUFFIX) clean: $(RM) $(OBJS) $(OBJS_CLEAN) build_date.h $(PROGRAM_NAME) $(PROGRAM_NAME).exe *~ *.gcno *.gcda *.gcov distclean: clean $(RM) *.pkg.tar.xz $(RM) -r jdupes-*-win*/ jdupes-*-win*.zip $(RM) -r jdupes-*-mac*/ jdupes-*-mac*.zip package: +./chroot_build.sh winpackage: +./generate_windows_packages.sh macpackage: +./generate_mac_packages.sh jdupes-1.18.1/README.md000066400000000000000000000666401370142704600143730ustar00rootroot00000000000000Introduction -------------------------------------------------------------------------- jdupes is a program for identifying and taking actions upon duplicate files. A WORD OF WARNING: jdupes IS NOT a drop-in compatible replacement for fdupes! Do not blindly replace fdupes with jdupes in scripts and expect everything to work the same way. Option availability and meanings differ between the two programs. For example, the `-I` switch in jdupes means "isolate" and blocks intra-argument matching, while in fdupes it means "immediately delete files during scanning without prompting the user." Please consider financially supporting continued developemnt of jdupes: https://www.subscribestar.com/JodyBruchon v1.15+ specific: Why is the addition of single files not working? -------------------------------------------------------------------------- If a file was added through recursion and also added explicitly, that file would end up matching itself. This issue can be seen in v1.14.1 or older versions that support single file addition using a command like this in the jdupes source code directory: /usr/src/jdupes$ jdupes -rH testdir/isolate/1/ testdir/isolate/1/1.txt testdir/isolate/1/1.txt testdir/isolate/1/1.txt testdir/isolate/1/2.txt Even worse, using the special dot directory will make it happen without the -H option, which is how I discovered this bug: /usr/src/jdupes/testdir/isolate/1$ jdupes . 1.txt ./1.txt ./2.txt 1.txt This works for any path with a single dot directory anywhere in the path, so it has a good deal of potential for data loss in some use cases. As such, the best option was to shove out a new minor release with this feature turned off until some additional checking can be done, e.g. by making sure the canonical paths aren't identical between any two files. A future release will fix this safely. Why use jdupes instead of the original fdupes or other duplicate finders? -------------------------------------------------------------------------- The biggest reason is raw speed. In testing on various data sets, jdupes is over 7 times faster than fdupes-1.51 on average. jdupes provides a native Windows port. Most duplicate scanners built on Linux and other UNIX-like systems do not compile for Windows out-of-the-box and even if they do, they don't support Unicode and other Windows-specific quirks and features. jdupes is generally stable. All releases of jdupes are compared against a known working reference versions of fdupes or jdupes to be certain that output does not change. You get the benefits of an aggressive development process without putting your data at increased risk. Code in jdupes is written with data loss avoidance as the highest priority. If a choice must be made between being aggressive or careful, the careful way is always chosen. jdupes includes features that are not always found elsewhere. Examples of such features include block-level data deduplication and control over which file is kept when a match set is automatically deleted. jdupes is not afraid of dropping features of low value; a prime example is the `-1` switch which outputs all matches in a set on one line, a feature which was found to be useless in real-world tests and therefore thrown out. While jdupes maintains some degree of compatibility with fdupes from which it was originally derived, there is no guarantee that it will continue to maintain such compatibility in the future. However, compatibility will be retained between minor versions, i.e. jdupes-1.6 and jdupes-1.6.1 should not have any significant differences in results with identical command lines. If the program eats your dog or sets fire to your lawn, the authors cannot be held responsible. If you notice a bug, please report it. What jdupes is not: a similar (but not identical) file finding tool -------------------------------------------------------------------------- Please note that jdupes ONLY works on 100% exact matches. It does not have any sort of "similarity" matching, nor does it know anything about any specific file formats such as images or sounds. Something as simple as a change in embedded metadata such as the ID3 tags in an MP3 file or the EXIF information in a JPEG image will not change the sound or image presented to the user when opened, but technically it makes the file no longer identical to the original. Plenty of excellent tools already exist to "fuzzy match" specific file types using knowledge of their file formats to help. There are no plans to add this type of matching to jdupes. There are some match options available in jdupes that enable dangerous file matching based on partial or likely but not 100% certain matching. These are considered expert options for special situations and are clearly and loudly documented as being dangerous. The `-Q` and `-T` options are notable examples, and the extreme danger of the `-T` option is safeguarded by a requirement to specify it twice so it can't be used accidentally. How can I do stuff with jdupes that isn't supported by jdupes? -------------------------------------------------------------------------- The standard output format of jdupes is extremely simple. Match sets are presented with one file path per line, and match sets are separated by a blank line. This is easy to process with fairly simple shell scripts. You can find example shell scripts in the "example scripts" directory in the jdupes source code. The main example script, "example.sh", is easy to modify to take basic actions on each file in a match set. These scripts are used by piping the standard jdupes output to them: jdupes dir1 dir2 dir3 | example.sh scriptparameters Usage -------------------------------------------------------------------------- ``` Usage: jdupes [options] DIRECTORY... ``` Duplicate file sets will be printed by default unless a different action option is specified (delete, summarize, link, dedupe, etc.) ``` -@ --loud output annoying low-level debug info while running -0 --printnull output nulls instead of CR/LF (like 'find -print0') -1 --one-file-system do not match files on different filesystems/devices -A --nohidden exclude hidden files from consideration -B --dedupe do a copy-on-write (reflink/clone) deduplication -C --chunksize=# override I/O chunk size (min 4096, max 16777216) -d --delete prompt user for files to preserve and delete all others; important: under particular circumstances, data may be lost when using this option together with -s or --symlinks, or when specifying a particular directory more than once; refer to the documentation for additional information -D --debug output debug statistics after completion -f --omitfirst omit the first file in each set of matches -h --help display this help message -H --hardlinks treat any linked files as duplicate files. Normally linked files are treated as non-duplicates for safety -i --reverse reverse (invert) the match sort order -I --isolate files in the same specified directory won't match -j --json produce JSON (machine-readable) output -K --skiphash skip full file hashing (may be faster; 100% safe) -l --linksoft make relative symlinks for duplicates w/o prompting -L --linkhard hard link all duplicate files without prompting Windows allows a maximum of 1023 hard links per file -m --summarize summarize dupe information -M --printwithsummary will print matches and --summarize at the end -N --noprompt together with --delete, preserve the first file in each set of duplicates and delete the rest without prompting the user -o --order=BY select sort order for output, linking and deleting: by mtime (BY=time) or filename (BY=name, the default) -O --paramorder sort output files in order of command line parameter sequence Parameter order is more important than selected -o sort which applies should several files share the same parameter order -p --permissions don't consider files with different owner/group or permission bits as duplicates -P --print=type print extra info (partial, early, fullhash) -q --quiet hide progress indicator -Q --quick skip byte-by-byte duplicate verification. WARNING: this may delete non-duplicates! Read the manual first! -r --recurse for every directory, process its subdirectories too -R --recurse: for each directory given after this option follow subdirectories encountered within (note the ':' at the end of the option, manpage for more details) -s --symlinks follow symlinks -S --size show size of duplicate files -t --nochangecheck disable security check for file changes (aka TOCTTOU) -T --partial-only match based on partial hashes only. WARNING: EXTREMELY DANGEROUS paired with destructive actions! -T must be specified twice to work. Read the manual! -u --printunique print only a list of unique (non-matched) files -v --version display jdupes version and license information -x --xsize=SIZE exclude files of size < SIZE bytes from consideration --xsize=+SIZE '+' specified before SIZE, exclude size > SIZE -X --extfilter=x:y filter files based on specified criteria Use '-X help' for detailed extfilter help -z --zeromatch consider zero-length files to be duplicates -Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far You can send SIGUSR1 to the program to toggle this Detailed help for jdupes -X/--extfilter options General format: jdupes -X filter[:value][size_suffix] noext:ext1[,ext2,...] Exclude files with certain extension(s) onlyext:ext1[,ext2,...] Only include files with certain extension(s) size[+-=]:size[suffix] Exclude files meeting certain size criteria Size specs: + larger, - smaller, = equal to Specs can be mixed, i.e. size+=:100k will exclude files 100KiB or larger in size. nostr:text_string Exclude all paths containing the string onlystr:text_string Only allow paths containing the string HINT: you can use these for directories: -X nostr:/dir_x/ or -X onlystr:/dir_x/ newer:datetime Reject files newer than the specified date older:datetime Reject files newer than the specified date Date/time format: "YYYY-MM-DD HH:MM:SS" Time is optional (remember to escape spaces!) Some filters take no value or multiple values. Filters that can take a numeric option generally support the size multipliers K/M/G/T/P/E with or without an added iB or B. Multipliers are binary-style unless the B is used, which will use decimal multipliers. For example, 10k or 10kib = 10240; 10kb = 10000. Multipliers are case-insensitive. Filters have cumulative effects: jdupes -X size+:100 -X size-:100 will cause only files of exactly 100 bytes in size to be included. ``` For sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive) The `-t`/`-nochangecheck` option disables file change checks during/after scanning. This opens a security vulnerability that is called a TOCTTOU (time of check to time of use) vulnerability. The program normally runs checks immediately before scanning or taking action upon a file to see if the file has changed in some way since it was last checked. With this option enabled, the program will not run any of these checks, making the algorithm slightly faster, but also increasing the risk that the program scans a file, the file is changed after the scan, and the program still acts like the file was in its previous state. This is particularly dangerous when considering actions such as linking and deleting. In the most extreme case, a file could be deleted during scanning but match other files prior to that deletion; if the file is the first in the list of duplicates and auto-delete is used, all of the remaining matched files will be deleted as well. This option was added due to user reports of some filesystems (particularly network filesystems) changing the reported file information inappropriately, rendering the entire program unusable on such filesystems. The `-n`/`--noempty` option was removed for safety. Matching zero-length files as duplicates now requires explicit use of the `-z`/`--zeromatch` option instead. Duplicate files are listed together in groups with each file displayed on a separate line. The groups are then separated from each other by blank lines. The `-s`/`--symlinks` option will treat symlinked files as regular files, but direct symlinks will be treated as if they are hard linked files and the -H/--hardlinks option will apply to them in the same manner. When using `-d` or `--delete`, care should be taken to insure against accidental data loss. While no information will be immediately lost, using this option together with `-s` or `--symlink` can lead to confusing information being presented to the user when prompted for files to preserve. Specifically, a user could accidentally preserve a symlink while deleting the file it points to. A similar problem arises when specifying a particular directory more than once. All files within that directory will be listed as their own duplicates, leading to data loss should a user preserve a file without its "duplicate" (the file itself!) Using `-1` or `--one-file-system` prevents matches that cross filesystems, but a more relaxed form of this option may be added that allows cross-matching for all filesystems that each parameter is present on. `-Z` or `--softabort` used to be `--hardabort` in jdupes prior to v1.5 and had the opposite behavior. Defaulting to taking action on abort is probably not what most users would expect. The decision to invert rather than reassign to a different option was made because this feature was still fairly new at the time of the change. On non-Windows platforms that support SIGUSR1, you can toggle the state of the `-Z` option by sending a SIGUSR1 to the program. This is handy if you want to abort jdupes, didn't specify `-Z`, and changed your mind and don't want to lose all the work that was done so far. Just do '`killall -USR1 jdupes`' and you wll be able to abort with `-Z`. This works in reverse: if you want to prevent a `-Z` from happening, a SIGUSR1 will toggle it back off. That's a lot less useful because you can just stop and kill the program to get the same effect, but it's there if you want it for some reason. Sending the signal twice while the program is stopped will behave as if it was only sent once, as per normal POSIX signal behavior. The `-O` or `--paramorder` option allows the user greater control over what appears in the first position of a match set, specifically for keeping the `-N` option from deleting all but one file in a set in a seemingly random way. All directories specified on the command line will be used as the sorting order of result sets first, followed by the sorting algorithm set by the `-o` or `--order` option. This means that the order of all match pairs for a single directory specification will retain the old sorting behavior even if this option is specified. When used together with options `-s` or `--symlink`, a user could accidentally preserve a symlink while deleting the file it points to. The `-Q` or `--quick option` only reads each file once, hashes it, and performs comparisons based solely on the hashes. There is a small but significant risk of a hash collision which is the purpose of the failsafe byte-for-byte comparison that this option explicitly bypasses. Do not use it on ANY data set for which any amount of data loss is unacceptable. You have been warned! The `-T` or `--partial-only` option produces results based on a hash of the first block of file data in each file, ignoring everything else in the file. Partial hash checks have always been an important exclusion step in the jdupes algorithm, usually hashing the first 4096 bytes of data and allowing files that are different at the start to be rejected early. In certain scenarios it may be a useful heuristic for a user to see that a set of files has the same size and the same starting data, even if the remaining data does not match; one example of this would be comparing files with data blocks that are damaged or missing such as an incomplete file transfer or checking a data recovery against known-good copies to see what damaged data can be deleted in favor of restoring the known-good copy. This option is meant to be used with informational actions and can result in EXTREME DATA LOSS if used with options that delete files, create hard links, or perform other destructive actions on data based on the matching output. Because of the potential for massive data destruction, this option MUST BE SPECIFIED TWICE to take effect and will error out if it is only specified once. The `-I`/`--isolate` option attempts to block matches that are contained in the same specified directory parameter on the command line. Due to the underlying nature of the jdupes algorithm, a lot of matches will be blocked by this option that probably should not be. This code could use improvement. The `-C`/`--chunksize` option overrides the size of the I/O "chunk" used for all file operations. Larger numbers will increase the amount of data read at once from each file and may improve performance when scanning lots of files that are larger than the default chunk size by reducing "thrashing" of the hard disk heads. Smaller numbers may increase algorithm speed depending on the characteristics of your CPU but will usually increase I/O and system call overhead as well. The nubmer also directly affects memory usage: I/O chunk size is used for at least three allocations in the program, so using a chunk size of 16777216 (16 MiB) will require 48 MiB of RAM. The default is usually between 32768 and 65536 which results in the fastest raw speed of the algorithm and generally good all-around performance. Feel free to experiment with the number on your data set and report your experiences (preferably with benchmarks and info on your data set.) Using `-P`/`--print` will cause the program to print extra information that may be useful but will pollute the output in a way that makes scripted handling difficult. Its current purpose is to reveal more information about the file matching process by printing match pairs that pass certain steps of the process prior to full file comparison. This can be useful if you have two files that are passing early checks but failing after full checks. Hard and soft (symbolic) linking status symbols and behavior -------------------------------------------------------------------------- A set of arrows are used in file linking to show what action was taken on each link candidate. These arrows are as follows: `---->` File was hard linked to the first file in the duplicate chain `-@@->` File was symlinked to the first file in the chain `-==->` Already a hard link to the first file in the chain `-//->` File linking failed due to an error during the linking process If your data set has linked files and you do not use `-H` to always consider them as duplicates, you may still see linked files appear together in match sets. This is caused by a separate file that matches with linked files independently and is the correct behavior. See notes below on the "triangle problem" in jdupes for technical details. Microsoft Windows platform-specific notes -------------------------------------------------------------------------- Windows has a hard limit of 1024 hard links per file. There is no way to change this. The documentation for CreateHardLink() states: "The maximum number of hard links that can be created with this function is 1023 per file. If more than 1023 links are created for a file, an error results." (The number is actually 1024, but they're ignoring the first file.) The current jdupes algorithm's "triangle problem" -------------------------------------------------------------------------- Pairs of files are excluded individually based on how the two files compare. For example, if `--hardlinks` is not specified then two files which are hard linked will not match one another for duplicate scanning purposes. The problem with only examining files in pairs is that certain circumstances will lead to the exclusion being overridden. Let's say we have three files with identical contents: ``` a/file1 a/file2 a/file3 ``` and `a/file1` is linked to `a/file3`. Here's how `jdupes a/` sees them: --- Are 'a/file1' and 'a/file2' matches? Yes [point a/file1->duplicates to a/file2] Are 'a/file1' and 'a/file3' matches? No (hard linked already, `-H` off) Are 'a/file2' and 'a/file3' matches? Yes [point a/file2->duplicates to a/file3] --- Now you have the following duplicate list: ``` a/file1->duplicates ==> a/file2->duplicates ==> a/file3 ``` The solution is to split match sets into multiple sets, but doing this will also remove the guarantee that files will only ever appear in one match set and could result in data loss if handled improperly. In the future, options for "greedy" and "sparse" may be introduced to switch between allowing triangle matches to be in the same set vs. splitting sets after matching finishes without the "only ever appears once" guarantee. Does jdupes meet the "Good Practice when Deleting Duplicates" by rmlint? -------------------------------------------------------------------------- Yes. If you've not read this list of cautions, it is available at http://rmlint.readthedocs.io/en/latest/cautions.html Here's a breakdown of how jdupes addresses each of the items listed. ### "Backup your data"/"Measure twice, cut once" These guidelines are for the user of duplicate scanning software, not the software itself. Back up your files regularly. Use jdupes to print a list of what is found as duplicated and check that list very carefully before automatically deleting the files. ### "Beware of unusual filename characters" The only character that poses a concern in jdupes is a newline `\n` and that is only a problem because the duplicate set printer uses them to separate file names. Actions taken by jdupes are not parsed like a command line, so spaces and other weird characters in names aren't a problem. Escaping the names properly if acting on the printed output is a problem for the user's shell script or other external program. ### "Consider safe removal options" This is also an exercise for the user. ### "Traversal Robustness" jdupes tracks each directory traversed by dev:inode pair to avoid adding the contents of the same directory twice. This prevents the user from being able to register all of their files twice by duplicating an entry on the command line. Symlinked directories are only followed if they weren't already followed earlier. Files are renamed to a temporary name before any linking is done and if the link operation fails they are renamed back to the original name. ### "Collision Robustness" jdupes uses jodyhash for file data hashing. This hash is extremely fast with a low collision rate, but it still encounters collisions as any hash function will ("secure" or otherwise) due to the pigeonhole principle. This is why jdupes performs a full-file verification before declaring a match. It's slower than matching by hash only, but the pigeonhole principle puts all data sets larger than the hash at risk of collision, meaning a false duplicate detection and data loss. The slower completion time is not as important as data integrity. Checking for a match based on hashes alone is irresponsible, and using secure hashes like MD5 or the SHA families is orders of magnitude slower than jodyhash while still suffering from the risk brought about by the pigeonholing. An example of this problem is as follows: if you have 365 days in a year and 366 people, the chance of having at least two birthdays on the same day is guaranteed; likewise, even though SHA512 is a 512-bit (64-byte) wide hash, there are guaranteed to be at least 256 pairs of data streams that causes a collision once any of the data streams being hashed for comparison is 65 bytes (520 bits) or larger. ### "Unusual Characters Robustness" jdupes does not protect the user from putting ASCII control characters in their file names; they will mangle the output if printed, but they can still be operated upon by the actions (delete, link, etc.) in jdupes. ### "Seek Thrash Robustness" jdupes uses an I/O chunk size that is optimized for reading as much as possible from disk at once to take advantage of high sequential read speeds in traditional rotating media drives while balancing against the significantly higher rate of CPU cache misses triggered by an excessively large I/O buffer size. Enlarging the I/O buffer further may allow for lots of large files to be read with less head seeking, but the CPU cache misses slow the algorithm down and memory usage increases to hold these large buffers. jdupes is benchmarked periodically to make sure that the chosen I/O chunk size is the best compromise for a wide variety of data sets. ### "Memory Usage Robustness" This is a very subjective concern considering that even a cell phone in someone's pocket has at least 1GB of RAM, however it still applies in the embedded device world where 32MB of RAM might be all that you can have. Even when processing a data set with over a million files, jdupes memory usage (tested on Linux x86_64 with -O3 optimization) doesn't exceed 2GB. A low memory mode can be chosen at compile time to reduce overall memory usage with a small performance penalty. Contact information -------------------------------------------------------------------------- For all jdupes inquiries, contact Jody Bruchon Please DO NOT contact Adrian Lopez about issues with jdupes. Legal information and software license -------------------------------------------------------------------------- jdupes is Copyright (C) 2015-2020 by Jody Bruchon Derived from the original 'fdupes' 1.51 (C) 1999-2014 by Adrian Lopez Includes other code libraries which are (C) 2015-2020 by Jody Bruchon The MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jdupes-1.18.1/README.stupid_dupes000066400000000000000000000044741370142704600165000ustar00rootroot00000000000000Introduction -------------------------------------------------------------------------- stupid_dupes is a shell script that copies the most basic capabilities of jdupes. It is inefficient. It barely has enough features to be worthy of using the word "features" at all. Despite all of that, it's pretty safe and produces the same simple match set printouts as jdupes. This program illustrates how a duplicate scanner works on a basic level. It has a minimal set of requirements: * GNU bash * find with support for -type and -maxdepth * stat * cat * jodyhash (or any other program that outputs ONLY a hash) * dd (for partial hashing) It's slow. Real slow. You're welcome. Please consider financially supporting continued development of stupid_dupes (like you'd spend the money so smartly otherwise): https://www.subscribestar.com/JodyBruchon Contact information -------------------------------------------------------------------------- For stupid_dupes inquiries, contact Jody Bruchon and be sure to say something really stupid when you do. Legal information and software license -------------------------------------------------------------------------- stupid_dupes is Copyright (C) 2020 by Jody Bruchon and for some reason Jody is willing to admit to writing it. The MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jdupes-1.18.1/act_dedupefiles.c000066400000000000000000000112701370142704600163650ustar00rootroot00000000000000/* Deduplication of files with OS-specific copy-on-write mechanisms * This file is part of jdupes; see jdupes.c for license information */ #include "jdupes.h" #ifdef ENABLE_DEDUPE #include #include #include #include #include #include #include #ifdef __linux__ /* Use built-in static dedupe header if requested */ #ifdef STATIC_DEDUPE_H #include "linux-dedupe-static.h" #else #include #endif /* STATIC_DEDUPE_H */ /* If the Linux headers are too old, automatically use the static one */ #ifndef FILE_DEDUPE_RANGE_SAME #warning Automatically enabled STATIC_DEDUPE_H due to insufficient header support #include "linux-dedupe-static.h" #endif /* FILE_DEDUPE_RANGE_SAME */ #include #define JDUPES_DEDUPE_SUPPORTED 1 #endif /* __linux__ */ #ifdef __APPLE__ #ifdef NO_HARDLINKS #error Hard link support is required for dedupe on macOS but NO_HARDLINKS was set #endif #include "act_linkfiles.h" #define JDUPES_DEDUPE_SUPPORTED 1 #endif #ifndef JDUPES_DEDUPE_SUPPORTED #error Dedupe is only supported on Linux and macOS #endif #include "act_dedupefiles.h" #define KERNEL_DEDUP_MAX_SIZE 16777216 extern void dedupefiles(file_t * restrict files) { #ifdef __linux__ struct file_dedupe_range *fdr; struct file_dedupe_range_info *fdri; file_t *curfile, *curfile2, *dupefile; int src_fd; uint64_t total_files = 0; LOUD(fprintf(stderr, "\ndedupefiles: %p\n", files);) if (!files) nullptr("dedupefiles()"); fdr = (struct file_dedupe_range *)calloc(1, sizeof(struct file_dedupe_range) + sizeof(struct file_dedupe_range_info) + 1); fdr->dest_count = 1; fdri = &fdr->info[0]; for (curfile = files; curfile; curfile = curfile->next) { /* Skip all files that have no duplicates */ if (!ISFLAG(curfile->flags, FF_HAS_DUPES)) continue; CLEARFLAG(curfile->flags, FF_HAS_DUPES); /* For each duplicate list head, handle the duplicates in the list */ curfile2 = curfile; src_fd = open(curfile->d_name, O_RDWR); /* If an open fails, keep going down the dupe list until it is exhausted */ while (src_fd == -1 && curfile2->duplicates && curfile2->duplicates->duplicates) { fprintf(stderr, "dedupe: open failed (skipping): %s\n", curfile2->d_name); curfile2 = curfile2->duplicates; src_fd = open(curfile2->d_name, O_RDWR); } if (src_fd == -1) continue; printf(" [SRC] %s\n", curfile2->d_name); /* Run dedupe for each set */ for (dupefile = curfile->duplicates; dupefile; dupefile = dupefile->duplicates) { off_t remain; int err; /* Don't pass hard links to dedupe (GitHub issue #25) */ if (dupefile->device == curfile->device && dupefile->inode == curfile->inode) { printf(" -==-> %s\n", dupefile->d_name); continue; } /* Open destination file, skipping any that fail */ fdri->dest_fd = open(dupefile->d_name, O_RDWR); if (fdri->dest_fd == -1) { fprintf(stderr, "dedupe: open failed (skipping): %s\n", dupefile->d_name); continue; } /* Dedupe src <--> dest, 16 MiB or less at a time */ remain = dupefile->size; fdri->status = FILE_DEDUPE_RANGE_SAME; /* Consume data blocks until no data remains */ while (remain) { errno = 0; fdr->src_offset = (uint64_t)(dupefile->size - remain); fdri->dest_offset = fdr->src_offset; fdr->src_length = (uint64_t)(remain <= KERNEL_DEDUP_MAX_SIZE ? remain : KERNEL_DEDUP_MAX_SIZE); ioctl(src_fd, FIDEDUPERANGE, fdr); if (fdri->status < 0) break; remain -= (off_t)fdr->src_length; } /* Handle any errors */ err = fdri->status; if (err != FILE_DEDUPE_RANGE_SAME || errno != 0) { printf(" -XX-> %s\n", dupefile->d_name); fprintf(stderr, "error: "); if (err == FILE_DEDUPE_RANGE_DIFFERS) fprintf(stderr, "not identical (files modified between scan and dedupe?)\n"); else if (err != 0) fprintf(stderr, "%s (%d)\n", strerror(-err), err); else if (errno != 0) fprintf(stderr, "%s (%d)\n", strerror(errno), errno); } else { /* Dedupe OK; report to the user and add to file count */ printf(" ====> %s\n", dupefile->d_name); total_files++; } close((int)fdri->dest_fd); } printf("\n"); close(src_fd); total_files++; } if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "Deduplication done (%lu files processed)\n", total_files); free(fdr); #endif /* __linux__ */ /* On macOS, clonefile() is basically a "hard link" function, so linkfiles will do the work. */ #ifdef __APPLE__ linkfiles(files, 2); #endif /* __APPLE__ */ return; } #endif /* ENABLE_DEDUPE */ jdupes-1.18.1/act_dedupefiles.h000066400000000000000000000005361370142704600163750ustar00rootroot00000000000000/* jdupes action for BTRFS block-level deduplication * This file is part of jdupes; see jdupes.c for license information */ #ifndef ACT_DEDUPEFILES_H #define ACT_DEDUPEFILES_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" extern void dedupefiles(file_t * restrict files); #ifdef __cplusplus } #endif #endif /* ACT_DEDUPEFILES_H */ jdupes-1.18.1/act_deletefiles.c000066400000000000000000000106321370142704600163620ustar00rootroot00000000000000/* Delete duplicate files automatically or interactively * This file is part of jdupes; see jdupes.c for license information */ #include #include #include #include #include #include "jdupes.h" #include "jody_win_unicode.h" #include "act_deletefiles.h" /* For interactive deletion input */ #define INPUT_SIZE 512 #ifdef UNICODE static wpath_t wstr; #endif extern void deletefiles(file_t *files, int prompt, FILE *tty) { unsigned int counter, groups; unsigned int curgroup = 0; file_t *tmpfile; file_t **dupelist; unsigned int *preserve; char *preservestr; char *token; char *tstr; unsigned int number, sum, max, x; size_t i; LOUD(fprintf(stderr, "deletefiles: %p, %d, %p\n", files, prompt, tty)); groups = get_max_dupes(files, &max, NULL); max++; dupelist = (file_t **) malloc(sizeof(file_t*) * max); preserve = (unsigned int *) malloc(sizeof(int) * max); preservestr = (char *) malloc(INPUT_SIZE); if (!dupelist || !preserve || !preservestr) oom("deletefiles() structures"); for (; files; files = files->next) { if (ISFLAG(files->flags, FF_HAS_DUPES)) { curgroup++; counter = 1; dupelist[counter] = files; if (prompt) { printf("[%u] ", counter); fwprint(stdout, files->d_name, 1); } tmpfile = files->duplicates; while (tmpfile) { dupelist[++counter] = tmpfile; if (prompt) { printf("[%u] ", counter); fwprint(stdout, tmpfile->d_name, 1); } tmpfile = tmpfile->duplicates; } if (prompt) printf("\n"); /* preserve only the first file */ if (!prompt) { preserve[1] = 1; for (x = 2; x <= counter; x++) preserve[x] = 0; } else do { /* prompt for files to preserve */ printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one)", curgroup, groups, counter); if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%" PRIuMAX " byte%c each)", (uintmax_t)files->size, (files->size != 1) ? 's' : ' '); printf(": "); fflush(stdout); /* treat fgets() failure as if nothing was entered */ if (!fgets(preservestr, INPUT_SIZE, tty)) preservestr[0] = '\n'; i = strlen(preservestr) - 1; /* tail of buffer must be a newline */ while (preservestr[i] != '\n') { tstr = (char *)realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE); if (!tstr) oom("deletefiles() prompt string"); preservestr = tstr; if (!fgets(preservestr + i + 1, INPUT_SIZE, tty)) { preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */ break; } i = strlen(preservestr) - 1; } for (x = 1; x <= counter; x++) preserve[x] = 0; token = strtok(preservestr, " ,\n"); if (token != NULL && (*token == 'n' || *token == 'N')) goto preserve_none; while (token != NULL) { if (*token == 'a' || *token == 'A') for (x = 0; x <= counter; x++) preserve[x] = 1; number = 0; sscanf(token, "%u", &number); if (number > 0 && number <= counter) preserve[number] = 1; token = strtok(NULL, " ,\n"); } for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x]; } while (sum < 1); /* save at least one file */ preserve_none: printf("\n"); for (x = 1; x <= counter; x++) { if (preserve[x]) { printf(" [+] "); fwprint(stdout, dupelist[x]->d_name, 1); } else { #ifdef UNICODE if (!M2W(dupelist[x]->d_name, wstr)) { printf(" [!] "); fwprint(stdout, dupelist[x]->d_name, 0); printf("-- MultiByteToWideChar failed\n"); continue; } #endif if (file_has_changed(dupelist[x])) { printf(" [!] "); fwprint(stdout, dupelist[x]->d_name, 0); printf("-- file changed since being scanned\n"); #ifdef UNICODE } else if (DeleteFileW(wstr) != 0) { #else } else if (remove(dupelist[x]->d_name) == 0) { #endif printf(" [-] "); fwprint(stdout, dupelist[x]->d_name, 1); } else { printf(" [!] "); fwprint(stdout, dupelist[x]->d_name, 0); printf("-- unable to delete file\n"); } } } printf("\n"); } } free(dupelist); free(preserve); free(preservestr); return; } jdupes-1.18.1/act_deletefiles.h000066400000000000000000000005441370142704600163700ustar00rootroot00000000000000/* jdupes action for deleting duplicate files * This file is part of jdupes; see jdupes.c for license information */ #ifndef ACT_DELETEFILES_H #define ACT_DELETEFILES_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" extern void deletefiles(file_t *files, int prompt, FILE *tty); #ifdef __cplusplus } #endif #endif /* ACT_DELETEFILES_H */ jdupes-1.18.1/act_linkfiles.c000066400000000000000000000300541370142704600160550ustar00rootroot00000000000000/* Hard link or symlink files * This file is part of jdupes; see jdupes.c for license information */ #include "jdupes.h" /* Compile out the code if no linking support is built in */ #if !(defined NO_HARDLINKS && defined NO_SYMLINKS) #include #include #include #include #include "act_linkfiles.h" #include "jody_win_unicode.h" #ifdef ON_WINDOWS #include "win_stat.h" #endif #ifdef UNICODE wpath_t wname, wname2; #endif /* Apple clonefile() is basically a hard link */ #ifdef ENABLE_DEDUPE #ifdef __APPLE__ #ifdef NO_HARDLINKS #error Hard link support is required for dedupe on macOS #endif #include #include #define ENABLE_CLONEFILE_LINK 1 #endif /* __APPLE__ */ #endif /* ENABLE_DEDUPE */ /* linktype: 0=symlink, 1=hardlink, 2=clonefile() */ extern void linkfiles(file_t *files, const int linktype) { static file_t *tmpfile; static file_t *srcfile; static file_t *curfile; static file_t ** restrict dupelist; static unsigned int counter; static unsigned int max = 0; static unsigned int x = 0; static size_t name_len = 0; static int i, success; #ifndef NO_SYMLINKS static unsigned int symsrc; static char rel_path[PATHBUF_SIZE]; #endif LOUD(fprintf(stderr, "linkfiles(%d): %p\n", linktype, files);) curfile = files; while (curfile) { if (ISFLAG(curfile->flags, FF_HAS_DUPES)) { counter = 1; tmpfile = curfile->duplicates; while (tmpfile) { counter++; tmpfile = tmpfile->duplicates; } if (counter > max) max = counter; } curfile = curfile->next; } max++; dupelist = (file_t**) malloc(sizeof(file_t*) * max); if (!dupelist) oom("linkfiles() dupelist"); while (files) { if (ISFLAG(files->flags, FF_HAS_DUPES)) { counter = 1; dupelist[counter] = files; tmpfile = files->duplicates; while (tmpfile) { counter++; dupelist[counter] = tmpfile; tmpfile = tmpfile->duplicates; } /* Link every file to the first file */ if (linktype) { #ifndef NO_HARDLINKS x = 2; srcfile = dupelist[1]; #else fprintf(stderr, "internal error: linkfiles(hard) called without hard link support\nPlease report this to the author as a program bug\n"); exit(EXIT_FAILURE); #endif } else { #ifndef NO_SYMLINKS x = 1; /* Symlinks should target a normal file if one exists */ srcfile = NULL; for (symsrc = 1; symsrc <= counter; symsrc++) { if (!ISFLAG(dupelist[symsrc]->flags, FF_IS_SYMLINK)) { srcfile = dupelist[symsrc]; break; } } /* If no normal file exists, abort */ if (srcfile == NULL) continue; #else fprintf(stderr, "internal error: linkfiles(soft) called without symlink support\nPlease report this to the author as a program bug\n"); exit(EXIT_FAILURE); #endif } if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("[SRC] "); fwprint(stdout, srcfile->d_name, 1); } for (; x <= counter; x++) { if (linktype == 1 || linktype == 2) { /* Can't hard link files on different devices */ if (srcfile->device != dupelist[x]->device) { fprintf(stderr, "warning: hard link target on different device, not linking:\n-//-> "); fwprint(stderr, dupelist[x]->d_name, 1); continue; } else { /* The devices for the files are the same, but we still need to skip * anything that is already hard linked (-L and -H both set) */ if (srcfile->inode == dupelist[x]->inode) { /* Don't show == arrows when not matching against other hard links */ if (ISFLAG(flags, F_CONSIDERHARDLINKS)) if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("-==-> "); fwprint(stdout, dupelist[x]->d_name, 1); } continue; } } } else { /* Symlink prerequisite check code can go here */ /* Do not attempt to symlink a file to itself or to another symlink */ #ifndef NO_SYMLINKS if (ISFLAG(dupelist[x]->flags, FF_IS_SYMLINK) && ISFLAG(dupelist[symsrc]->flags, FF_IS_SYMLINK)) continue; if (x == symsrc) continue; #endif } #ifdef UNICODE if (!M2W(dupelist[x]->d_name, wname)) { fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, dupelist[x]->d_name, 1); continue; } #endif /* UNICODE */ /* Do not attempt to hard link files for which we don't have write access */ #ifdef ON_WINDOWS if (dupelist[x]->mode & FILE_ATTRIBUTE_READONLY) #else if (access(dupelist[x]->d_name, W_OK) != 0) #endif { fprintf(stderr, "warning: link target is a read-only file, not linking:\n-//-> "); fwprint(stderr, dupelist[x]->d_name, 1); continue; } /* Check file pairs for modification before linking */ /* Safe linking: don't actually delete until the link succeeds */ i = file_has_changed(srcfile); if (i) { fprintf(stderr, "warning: source file modified since scanned; changing source file:\n[SRC] "); fwprint(stderr, dupelist[x]->d_name, 1); LOUD(fprintf(stderr, "file_has_changed: %d\n", i);) srcfile = dupelist[x]; continue; } if (file_has_changed(dupelist[x])) { fprintf(stderr, "warning: target file modified since scanned, not linking:\n-//-> "); fwprint(stderr, dupelist[x]->d_name, 1); continue; } #ifdef ON_WINDOWS /* For Windows, the hard link count maximum is 1023 (+1); work around * by skipping linking or changing the link source file as needed */ if (STAT(srcfile->d_name, &s) != 0) { fprintf(stderr, "warning: win_stat() on source file failed, changing source file:\n[SRC] "); fwprint(stderr, dupelist[x]->d_name, 1); srcfile = dupelist[x]; continue; } if (s.st_nlink >= 1024) { fprintf(stderr, "warning: maximum source link count reached, changing source file:\n[SRC] "); srcfile = dupelist[x]; continue; } if (STAT(dupelist[x]->d_name, &s) != 0) continue; if (s.st_nlink >= 1024) { fprintf(stderr, "warning: maximum destination link count reached, skipping:\n-//-> "); fwprint(stderr, dupelist[x]->d_name, 1); continue; } #endif /* Make sure the name will fit in the buffer before trying */ name_len = strlen(dupelist[x]->d_name) + 14; if (name_len > PATHBUF_SIZE) continue; /* Assemble a temporary file name */ strcpy(tempname, dupelist[x]->d_name); strcat(tempname, ".__jdupes__.tmp"); /* Rename the source file to the temporary name */ #ifdef UNICODE if (!M2W(tempname, wname2)) { fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, srcfile->d_name, 1); continue; } i = MoveFileW(wname, wname2) ? 0 : 1; #else i = rename(dupelist[x]->d_name, tempname); #endif if (i != 0) { fprintf(stderr, "warning: cannot move link target to a temporary name, not linking:\n-//-> "); fwprint(stderr, dupelist[x]->d_name, 1); /* Just in case the rename succeeded yet still returned an error, roll back the rename */ #ifdef UNICODE MoveFileW(wname2, wname); #else rename(tempname, dupelist[x]->d_name); #endif continue; } /* Create the desired hard link with the original file's name */ errno = 0; success = 0; #ifdef ON_WINDOWS #ifdef UNICODE if (!M2W(srcfile->d_name, wname2)) { fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, srcfile->d_name, 1); continue; } if (CreateHardLinkW((LPCWSTR)wname, (LPCWSTR)wname2, NULL) == TRUE) success = 1; #else if (CreateHardLink(dupelist[x]->d_name, srcfile->d_name, NULL) == TRUE) success = 1; #endif #else /* ON_WINDOWS */ if (linktype == 1) { if (link(srcfile->d_name, dupelist[x]->d_name) == 0) success = 1; #ifdef ENABLE_CLONEFILE_LINK } else if (linktype == 2) { if (clonefile(srcfile->d_name, dupelist[x]->d_name, 0) == 0) success = 1; #endif /* ENABLE_CLONEFILE_LINK */ } #ifndef NO_SYMLINKS else { i = make_relative_link_name(srcfile->d_name, dupelist[x]->d_name, rel_path); LOUD(fprintf(stderr, "symlink GRN: %s to %s = %s\n", srcfile->d_name, dupelist[x]->d_name, rel_path)); if (i < 0) { fprintf(stderr, "warning: make_relative_link_name() failed (%d)\n", i); } else if (i == 1) { fprintf(stderr, "warning: files to be linked have the same canonical path; not linking\n"); } else if (symlink(rel_path, dupelist[x]->d_name) == 0) success = 1; } #endif /* NO_SYMLINKS */ #endif /* ON_WINDOWS */ if (success) { if (!ISFLAG(flags, F_HIDEPROGRESS)) { switch (linktype) { case 0: /* symlink */ printf("-@@-> "); break; default: case 1: /* hardlink */ printf("---->"); break; #ifdef ENABLE_CLONEFILE_LINK case 2: /* clonefile */ printf("-##-> "); break; #endif } fwprint(stdout, dupelist[x]->d_name, 1); } } else { /* The link failed. Warn the user and put the link target back */ if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("-//-> "); fwprint(stdout, dupelist[x]->d_name, 1); } fprintf(stderr, "warning: unable to link '"); fwprint(stderr, dupelist[x]->d_name, 0); fprintf(stderr, "' -> '"); fwprint(stderr, srcfile->d_name, 0); fprintf(stderr, "': %s\n", strerror(errno)); #ifdef UNICODE if (!M2W(tempname, wname2)) { fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, tempname, 1); continue; } i = MoveFileW(wname2, wname) ? 0 : 1; #else i = rename(tempname, dupelist[x]->d_name); #endif /* UNICODE */ if (i != 0) { fprintf(stderr, "error: cannot rename temp file back to original\n"); fprintf(stderr, "original: "); fwprint(stderr, dupelist[x]->d_name, 1); fprintf(stderr, "current: "); fwprint(stderr, tempname, 1); } continue; } /* Remove temporary file to clean up; if we can't, reverse the linking */ #ifdef UNICODE if (!M2W(tempname, wname2)) { fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, tempname, 1); continue; } i = DeleteFileW(wname2) ? 0 : 1; #else i = remove(tempname); #endif /* UNICODE */ if (i != 0) { /* If the temp file can't be deleted, there may be a permissions problem * so reverse the process and warn the user */ fprintf(stderr, "\nwarning: can't delete temp file, reverting: "); fwprint(stderr, tempname, 1); #ifdef UNICODE i = DeleteFileW(wname) ? 0 : 1; #else i = remove(dupelist[x]->d_name); #endif /* This last error really should not happen, but we can't assume it won't */ if (i != 0) fprintf(stderr, "\nwarning: couldn't remove link to restore original file\n"); else { #ifdef UNICODE i = MoveFileW(wname2, wname) ? 0 : 1; #else i = rename(tempname, dupelist[x]->d_name); #endif if (i != 0) { fprintf(stderr, "\nwarning: couldn't revert the file to its original name\n"); fprintf(stderr, "original: "); fwprint(stderr, dupelist[x]->d_name, 1); fprintf(stderr, "current: "); fwprint(stderr, tempname, 1); } } } } if (!ISFLAG(flags, F_HIDEPROGRESS)) printf("\n"); } files = files->next; } free(dupelist); return; } #endif /* NO_HARDLINKS */ jdupes-1.18.1/act_linkfiles.h000066400000000000000000000005271370142704600160640ustar00rootroot00000000000000/* jdupes action for hard and soft file linking * This file is part of jdupes; see jdupes.c for license information */ #ifndef ACT_LINKFILES_H #define ACT_LINKFILES_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" extern void linkfiles(file_t *files, const int hard); #ifdef __cplusplus } #endif #endif /* ACT_LINKFILES_H */ jdupes-1.18.1/act_printjson.c000066400000000000000000000111461370142704600161240ustar00rootroot00000000000000/* Print comprehensive information to stdout in JSON format * This file is part of jdupes; see jdupes.c for license information */ #include #include #include #include #include #include "jdupes.h" #include "version.h" #include "jody_win_unicode.h" #include "act_printjson.h" #define IS_CONT(a) ((a & 0xc0) == 0x80) #define GET_CONT(a) (a & 0x3f) #define TO_HEX(a) (char)(((a) & 0x0f) <= 0x09 ? ((a) & 0x0f) + 0x30 : ((a) & 0x0f) + 0x57) #ifndef __GNUC__ #define __builtin_expect(v,e) (v) #endif #define likely(x) __builtin_expect((x),1) #define unlikely(x) __builtin_expect((x),0) #if defined(__GNU__) && !defined(PATH_MAX) #define PATH_MAX 1024 #endif /** Decodes a single UTF-8 codepoint, consuming bytes. */ static inline uint32_t decode_utf8(const char * restrict * const string) { uint32_t ret = 0; /** Eat problems up silently. */ assert(!IS_CONT(**string)); while (unlikely(IS_CONT(**string))) (*string)++; /** ASCII. */ if (likely(!(**string & 0x80))) return (uint32_t)*(*string)++; /** Multibyte 2, 3, 4. */ if ((**string & 0xe0) == 0xc0) { ret = *(*string)++ & 0x1f; ret = (ret << 6) | GET_CONT(*(*string)++); return ret; } if ((**string & 0xf0) == 0xe0) { ret = *(*string)++ & 0x0f; ret = (ret << 6) | GET_CONT(*(*string)++); ret = (ret << 6) | GET_CONT(*(*string)++); return ret; } if ((**string & 0xf8) == 0xf0) { ret = *(*string)++ & 0x07; ret = (ret << 6) | GET_CONT(*(*string)++); ret = (ret << 6) | GET_CONT(*(*string)++); ret = (ret << 6) | GET_CONT(*(*string)++); return ret; } /** We shouldn't be here... Because 5 and 6 bytes are impossible... */ assert(0); return 0xffffffff; } /** Escapes a single UTF-16 code unit for JSON. */ static inline void escape_uni16(uint16_t u16, char ** const json) { *(*json)++ = '\\'; *(*json)++ = 'u'; *(*json)++ = TO_HEX(u16 >> 12); *(*json)++ = TO_HEX(u16 >> 8); *(*json)++ = TO_HEX(u16 >> 4); *(*json)++ = TO_HEX(u16); } /** Escapes a UTF-8 string to ASCII JSON format. */ static void json_escape(const char * restrict string, char * restrict const target) { uint32_t curr = 0; char *escaped = target; while (*string != '\0' && (escaped - target) < (PATH_MAX * 2 - 1)) { switch (*string) { case '\"': case '\\': *escaped++ = '\\'; *escaped++ = *string++; break; default: curr = decode_utf8(&string); if (curr == 0xffffffff) break; if (likely(curr < 0xffff)) { if (likely(curr < 0x20) || curr > 0xff) escape_uni16((uint16_t)curr, &escaped); else *escaped++ = (char)curr; } else { curr -= 0x10000; escape_uni16((uint16_t)(0xD800 + ((curr >> 10) & 0x03ff)), &escaped); escape_uni16((uint16_t)(0xDC00 + (curr & 0x03ff)), &escaped); } break; } } *escaped = '\0'; return; } extern void printjson(file_t * restrict files, const int argc, char **argv) { file_t * restrict tmpfile; int arg = 0, comma = 0, len = 0; char *temp = string_malloc(PATH_MAX * 2); char *temp2 = string_malloc(PATH_MAX * 2); char *temp_insert = temp; LOUD(fprintf(stderr, "printjson: %p\n", files)); /* Output information about the jdupes command environment */ printf("{\n \"jdupesVersion\": \"%s\",\n \"jdupesVersionDate\": \"%s\",\n", VER, VERDATE); printf(" \"commandLine\": \""); while (arg < argc) { len = sprintf(temp_insert, " %s", argv[arg]); assert(len >= 0); temp_insert += len; arg++; } json_escape(temp + 1, temp2); /* Skip the starting space */ printf("%s\",\n", temp2); printf(" \"extensionFlags\": \""); if (extensions[0] == NULL) printf("none\",\n"); else for (int c = 0; extensions[c] != NULL; c++) printf("%s%s", extensions[c], extensions[c+1] == NULL ? "\",\n" : " "); printf(" \"matchSets\": [\n"); while (files != NULL) { if (ISFLAG(files->flags, FF_HAS_DUPES)) { if (comma) printf(",\n"); printf(" {\n \"fileSize\": %" PRIdMAX ",\n \"fileList\": [\n { \"filePath\": \"", (intmax_t)files->size); sprintf(temp, "%s", files->d_name); json_escape(temp, temp2); fwprint(stdout, temp2, 0); printf("\""); tmpfile = files->duplicates; while (tmpfile != NULL) { printf(" },\n { \"filePath\": \""); sprintf(temp, "%s", tmpfile->d_name); json_escape(temp, temp2); fwprint(stdout, temp2, 0); printf("\""); tmpfile = tmpfile->duplicates; } printf(" }\n ]\n }"); comma = 1; } files = files->next; } printf("\n ]\n}\n"); string_free(temp); string_free(temp2); return; } jdupes-1.18.1/act_printjson.h000066400000000000000000000006211370142704600161250ustar00rootroot00000000000000/* jdupes action for printing comprehensive data as JSON to stdout * This file is part of jdupes; see jdupes.c for license information */ #ifndef ACT_PRINTJSON_H #define ACT_PRINTJSON_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" extern void printjson(file_t * restrict files, const int argc, char ** const restrict argv); #ifdef __cplusplus } #endif #endif /* ACT_PRINTJSON_H */ jdupes-1.18.1/act_printmatches.c000066400000000000000000000037371370142704600166060ustar00rootroot00000000000000/* Print matched file sets * This file is part of jdupes; see jdupes.c for license information */ #include #include #include #include "jdupes.h" #include "jody_win_unicode.h" #include "act_printmatches.h" extern void printmatches(file_t * restrict files) { file_t * restrict tmpfile; int printed = 0; int cr = 1; LOUD(fprintf(stderr, "printmatches: %p\n", files)); if (ISFLAG(flags, F_PRINTNULL)) cr = 2; while (files != NULL) { if (ISFLAG(files->flags, FF_HAS_DUPES)) { printed = 1; if (!ISFLAG(flags, F_OMITFIRST)) { if (ISFLAG(flags, F_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size, (files->size != 1) ? 's' : ' '); fwprint(stdout, files->d_name, cr); } tmpfile = files->duplicates; while (tmpfile != NULL) { fwprint(stdout, tmpfile->d_name, cr); tmpfile = tmpfile->duplicates; } if (files->next != NULL) fwprint(stdout, "", cr); } files = files->next; } if (printed == 0) fwprint(stderr, "No duplicates found.", 1); return; } /* Print files that have no duplicates (unique files) */ extern void printunique(file_t *files) { file_t *chain, *scan; int printed = 0; int cr = 1; LOUD(fprintf(stderr, "print_uniques: %p\n", files)); if (ISFLAG(flags, F_PRINTNULL)) cr = 2; scan = files; while (scan != NULL) { if (ISFLAG(scan->flags, FF_HAS_DUPES)) { chain = scan; while (chain != NULL) { SETFLAG(chain->flags, FF_NOT_UNIQUE); chain = chain->duplicates; } } scan = scan->next; } while (files != NULL) { if (!ISFLAG(files->flags, FF_NOT_UNIQUE)) { printed = 1; if (ISFLAG(flags, F_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size, (files->size != 1) ? 's' : ' '); fwprint(stdout, files->d_name, cr); } files = files->next; } if (printed == 0) fwprint(stderr, "No unique files found.", 1); return; } jdupes-1.18.1/act_printmatches.h000066400000000000000000000006311370142704600166010ustar00rootroot00000000000000/* jdupes action for printing matched file sets to stdout * This file is part of jdupes; see jdupes.c for license information */ #ifndef ACT_PRINTMATCHES_H #define ACT_PRINTMATCHES_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" extern void printmatches(file_t * restrict files); extern void printunique(file_t * restrict files); #ifdef __cplusplus } #endif #endif /* ACT_PRINTMATCHES_H */ jdupes-1.18.1/act_summarize.c000066400000000000000000000022171370142704600161110ustar00rootroot00000000000000/* Print summary of match statistics to stdout * This file is part of jdupes; see jdupes.c for license information */ #include #include #include #include "jdupes.h" #include "act_summarize.h" extern void summarizematches(const file_t * restrict files) { unsigned int numsets = 0; off_t numbytes = 0; int numfiles = 0; LOUD(fprintf(stderr, "summarizematches: %p\n", files)); while (files != NULL) { file_t *tmpfile; if (ISFLAG(files->flags, FF_HAS_DUPES)) { numsets++; tmpfile = files->duplicates; while (tmpfile != NULL) { numfiles++; numbytes += files->size; tmpfile = tmpfile->duplicates; } } files = files->next; } if (numsets == 0) printf("No duplicates found.\n"); else { printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets); if (numbytes < 1000) printf("%" PRIdMAX " byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' '); else if (numbytes <= 1000000) printf("%" PRIdMAX " KB\n", (intmax_t)(numbytes / 1000)); else printf("%" PRIdMAX " MB\n", (intmax_t)(numbytes / 1000000)); } return; } jdupes-1.18.1/act_summarize.h000066400000000000000000000005571370142704600161230ustar00rootroot00000000000000/* jdupes action for printing a summary of match stats to stdout * This file is part of jdupes; see jdupes.c for license information */ #ifndef ACT_SUMMARIZE_H #define ACT_SUMMARIZE_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" extern void summarizematches(const file_t * restrict files); #ifdef __cplusplus } #endif #endif /* ACT_SUMMARIZE_H */ jdupes-1.18.1/chroot_build.sh000077500000000000000000000042201370142704600161120ustar00rootroot00000000000000#!/bin/sh # Jody's generic chroot build script # Version 1.0 ARCHES="i386 x86-64 uclibc-i386 uclibc-x86-64" test -z "$NAME" && NAME="$(basename "$(pwd)")" test -e "version.h" && VER="$(grep '#define VER ' version.h | tr -d \\\" | cut -d' ' -f3)" test -z "$VER" && VER=0 export NAME export VER export CHROOT_BASE=/chroots export WD="$(pwd)" export PKG="pkg" echo "chroot builder: building '$NAME' version '$VER'" trap clean_exit INT QUIT ABRT HUP clean_exit () { umount $CHROOT/proc $CHROOT/sys $CHROOT/tmp $CHROOT/dev $CHROOT/usr/src $CHROOT/home } do_build () { test -z "$WD" && echo "WD not set, aborting" && exit 1 test -z "$PKG" && echo "PKG not set, aborting" && exit 1 make clean if ! make -j$JOBS all then echo "Build failed"; exit 1 else echo "WD/PKG: $WD/$PKG" test -d $WD/$PKG && rm -rf $WD/$PKG mkdir $WD/$PKG make DESTDIR=$WD/$PKG install && \ tar -C pkg -c usr | xz -e > ${NAME}_$VER-$ARCH.pkg.tar.xz echo "Built ${NAME}_$VER-$ARCH.pkg.tar.xz" fi } if [ "$(id -u)" != "0" ] then echo "You must be root to auto-build chroot packages." exit 1 fi if [ "$DO_CHROOT_BUILD" = "1" ] then test -z "$1" && echo "No arch specified" && exit 1 test ! -d "$1" && echo "Not a directory: $1" && exit 1 cd $1 export WD="$1" do_build echo "finished: $1" exit else echo baz export DO_CHROOT_BUILD=1 for ARCH in $ARCHES do export ARCH export CHROOT="$CHROOT_BASE/$ARCH" test ! -d $CHROOT && echo "$CHROOT not present, not building $ARCH package." && continue echo "Performing package build for $CHROOT" test ! -x $CHROOT/bin/sh && echo "$CHROOT does not seem to be a chroot; aborting." && exit 1 mount --bind /dev $CHROOT/dev || clean_exit mount --bind /usr/src $CHROOT/usr/src || clean_exit mount --bind /home $CHROOT/home || clean_exit mount -t proc proc $CHROOT/proc || clean_exit mount -t sysfs sysfs $CHROOT/sys || clean_exit mount -t tmpfs tmpfs $CHROOT/tmp || clean_exit if echo "$ARCH" | grep -q "i386" then linux32 chroot $CHROOT $WD/$0 $WD else chroot $CHROOT $WD/$0 $WD fi umount $CHROOT/proc $CHROOT/sys $CHROOT/tmp $CHROOT/dev $CHROOT/usr/src $CHROOT/home test -d $WD/$PKG && rm -rf $WD/$PKG done fi jdupes-1.18.1/compare_jdupes.sh000077500000000000000000000022231370142704600164360ustar00rootroot00000000000000#!/bin/sh # Runs the installed *dupes* binary and the built binary and compares # the output for sameness. Also displays timing statistics. ERR=0 # Detect installed program type (fdupes or jdupes) if [ -z "$ORIG_DUPE" ] then ORIG_DUPE=false jdupes -v 2>/dev/null >/dev/null && ORIG_DUPE=jdupes fdupes -v 2>/dev/null >/dev/null && ORIG_DUPE=fdupes test ! -z "$WINDIR" && "$WINDIR/jdupes.exe" -v 2>/dev/null >/dev/null && ORIG_DUPE="$WINDIR/jdupes.exe" fi if [ ! $ORIG_DUPE -v 2>/dev/null >/dev/null ] then echo "Cannot run installed jdupes or fdupes" exit 1 fi test ! -e ./jdupes && echo "Build jdupes first, silly" && exit 1 echo -n "Installed $ORIG_DUPE:" sync time $ORIG_DUPE -nrq "$@" > installed_output.txt || ERR=1 echo -en "\nBuilt jdupes:" sync time ./jdupes -nrq "$@" > built_output.txt || ERR=1 diff -Nau installed_output.txt built_output.txt if [ -e jdupes-standalone ] then echo -en "\nBuilt jdupes-standalone:" sync time ./jdupes-standalone -nrq "$@" > built_output.txt || ERR=1 diff -Nau installed_output.txt built_output.txt fi rm -f installed_output.txt built_output.txt test "$ERR" != "0" && echo "Errors were returned during execution" jdupes-1.18.1/example_scripts/000077500000000000000000000000001370142704600163025ustar00rootroot00000000000000jdupes-1.18.1/example_scripts/example.sh000077500000000000000000000036571370142704600203070ustar00rootroot00000000000000#!/bin/sh # This is a shell script that demonstrates how to process the standard # jdupes output (known as "printmatches") to perform custom actions. # Use it like this: # # jdupes whatever_parameters_you_like | ./example.sh script_parameters # # If you are on Windows, jdupes uses backslash path separators which # must be converted to forward slashes before piping to this script, # and carriage returns (\r) must also be deleted from jdupes output # (tested on MSYS2 MinGW, probably true for similar environments): # # jdupes params | tr '\\' / | tr -d '\r' | ./example.sh script_params # # The general structure of jdupes pipe scripts are: # * Initialize conditions # * Iterates through a match set and act on items # * Reset conditions and restart when a blank line is reached # This script moves all duplicate files to a different directory # without duplicating the directory structure. It can be easily # modified to make the required directories and create a "mirror" # consisting of duplicates that 'jdupes -rdN' would delete. # Announce what this script does so the user knows what's going on echo "jdupes example script - moving duplicate files to a directory" # If first parameter isn't a valid directory, give usage info and abort test ! -d "$1" && echo "usage: $0 destination_dir_to_move_files_to" && exit 1 # Exit status will be 0 on success, 1 on any failure EXITSTATUS=0 # Skip the first file in each match set FIRSTFILE=1 while read LINE do echo "$LINE" # Reset on a blank line; next line will be a first file test -z "$LINE" && FIRSTFILE=1 && continue # If this is the first file, take no action test $FIRSTFILE -eq 1 && FIRSTFILE=0 && continue # Move the file specified on the line to the directory specified if mv -f "$LINE" "$1" then # Print the action that was taken echo "'$LINE' => '$1/$(basename "$LINE")'" else echo "Failed to move: '$LINE' => '$1/$(basename "$LINE")'" >&2 EXITSTATUS=1 fi done exit $EXITSTATUS jdupes-1.18.1/example_scripts/fdupes_oneline.sh000077500000000000000000000010151370142704600216350ustar00rootroot00000000000000#!/bin/sh # Emulates fdupes -1 output # Usage: jdupes command line | ./fdupes_oneline.sh # This is a newline. IFS=' ' if [ "$1" = "-q" ] || [ "$1" = "--shell-quote" ]; then # This only works with GNU (env printf) or bash (builtin printf). # If you are using dash, change the command to use env printf... escape() { printf '%q ' "$LINE"; } else escape() { printf '%s' "$LINE" | sed 's/\\/\\\\/g; s/ /\\ /g'; printf ' '; } fi while read -r LINE do if [ -z "$LINE" ] then printf '\n' else escape fi done jdupes-1.18.1/generate_mac_packages.sh000077500000000000000000000013661370142704600177150ustar00rootroot00000000000000#!/bin/sh # Generate Windows package folders with variant builds # Number of parallel make processes PM=12 NAME="jdupes" VER="$(cat version.h | grep '#define VER "' | cut -d\" -f2)" echo "Program version: $VER" TA=mac64 PKGNAME="${NAME}-${VER}-$TA" echo "Generating package for: $PKGNAME" mkdir -p "$PKGNAME" test ! -d "$PKGNAME" && echo "Can't create directory for package" && exit 1 cp CHANGES README.md LICENSE $PKGNAME/ make clean && make -j$PM ENABLE_DEDUPE=1 stripped && cp ${NAME} $PKGNAME/${NAME} make clean && make -j$PM ENABLE_DEDUPE=1 LOUD=1 stripped && cp ${NAME} $PKGNAME/${NAME}-loud make clean && make -j$PM LOW_MEMORY=1 stripped && cp ${NAME} $PKGNAME/${NAME}-lowmem zip -9r ${PKGNAME}.zip $PKGNAME/ echo "Package generation complete." jdupes-1.18.1/generate_windows_packages.sh000077500000000000000000000016111370142704600206400ustar00rootroot00000000000000#!/bin/sh # Generate Windows package folders with variant builds # Number of parallel make processes PM=12 NAME="jdupes" VER="$(cat version.h | grep '#define VER "' | cut -d\" -f2)" echo "Program version: $VER" TGT=$(gcc -v 2>&1 | grep Target | cut -d\ -f2- | cut -d- -f1) test "$TGT" = "i686" && TA=win32 test "$TGT" = "x86_64" && TA=win64 echo "Target architecture: $TA" PKGNAME="${NAME}-${VER}-$TA" echo "Generating package for: $PKGNAME" mkdir -p "$PKGNAME" test ! -d "$PKGNAME" && echo "Can't create directory for package" && exit 1 cp CHANGES README.md LICENSE $PKGNAME/ make clean && make -j$PM stripped && cp ${NAME}.exe $PKGNAME/${NAME}.exe make clean && make -j$PM LOUD=1 stripped && cp ${NAME}.exe $PKGNAME/${NAME}-loud.exe make clean && make -j$PM LOW_MEMORY=1 stripped && cp ${NAME}.exe $PKGNAME/${NAME}-lowmem.exe zip -9r ${PKGNAME}.zip $PKGNAME/ echo "Package generation complete." jdupes-1.18.1/jdupes-standalone.c000066400000000000000000002256041370142704600166750ustar00rootroot00000000000000/* jdupes (C) 2015-2020 Jody Bruchon Forked from fdupes 1.51 (C) 1999-2014 Adrian Lopez Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define VER "1.13.1" #define VERDATE "2020-06-10" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Optional FIDEDUPERANGE support */ #ifdef ENABLE_DEDUPE #ifndef __linux__ #error "Filesystem-managed deduplication only available for Linux." #endif /* __linux__ */ #include #include #ifdef STATIC_BTRFS_H /* Static BTRFS header */ #include #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_DEVICE_PATH_NAME_MAX 1024 #define FILE_DEDUPE_RANGE_DIFFERS 1 struct file_dedupe_range_info { __s64 fd; __u64 logical_offset; __u64 bytes_deduped; __s32 status; __u32 reserved; }; struct file_dedupe_range { __u64 logical_offset; __u64 length; __u16 dest_count; __u16 reserved1; __u32 reserved2; struct file_dedupe_range_info info[0]; }; #define FIDEDUPERANGE _IOWR(BTRFS_IOCTL_MAGIC, 54, struct file_dedupe_range) /* Static BTRFS header */ #else #include #endif /* STATIC_BTRFS_H */ #endif /* ENABLE_DEDUPE */ #define JODY_HASH_WIDTH 32 typedef uint32_t jodyhash_t; /* Set hash type (change this if swapping in a different hash function) */ typedef jodyhash_t jdupes_hash_t; typedef ino_t jdupes_ino_t; typedef mode_t jdupes_mode_t; #define ISFLAG(a,b) ((a & b) == b) #define SETFLAG(a,b) (a |= b) #define CLEARFLAG(a,b) (a &= (~b)) /* Behavior modification flags */ #define F_RECURSE (1U << 0) #define F_HIDEPROGRESS (1U << 1) #define F_SOFTABORT (1U << 2) #define F_FOLLOWLINKS (1U << 3) #define F_DELETEFILES (1U << 4) #define F_INCLUDEEMPTY (1U << 5) #define F_CONSIDERHARDLINKS (1U << 6) #define F_SHOWSIZE (1U << 7) #define F_OMITFIRST (1U << 8) #define F_RECURSEAFTER (1U << 9) #define F_NOPROMPT (1U << 10) #define F_SUMMARIZEMATCHES (1U << 11) #define F_EXCLUDEHIDDEN (1U << 12) #define F_PERMISSIONS (1U << 13) #define F_HARDLINKFILES (1U << 14) #define F_EXCLUDESIZE (1U << 15) #define F_QUICKCOMPARE (1U << 16) #define F_USEPARAMORDER (1U << 17) #define F_DEDUPEFILES (1U << 18) #define F_REVERSESORT (1U << 19) #define F_ISOLATE (1U << 20) #define F_MAKESYMLINKS (1U << 21) #define F_PRINTMATCHES (1U << 22) #define F_ONEFS (1U << 23) #define F_PRINTNULL (1U << 24) #define F_PARTIALONLY (1U << 25) #define F_NOCHANGECHECK (1U << 26) #define F_PRINTJSON (1U << 27) #define F_LOUD (1U << 30) #define F_DEBUG (1U << 31) /* Per-file true/false flags */ #define F_VALID_STAT (1U << 0) #define F_HASH_PARTIAL (1U << 1) #define F_HASH_FULL (1U << 2) #define F_HAS_DUPES (1U << 3) #define F_IS_SYMLINK (1U << 4) /* Extra print flags */ #define P_PARTIAL (1U << 0) #define P_EARLYMATCH (1U << 1) #define P_FULLHASH (1U << 2) typedef enum { ORDER_NAME = 0, ORDER_TIME } ordertype_t; /* For interactive deletion input */ #define INPUT_SIZE 512 /* Per-file information */ typedef struct _file { struct _file *duplicates; struct _file *next; char *d_name; dev_t device; jdupes_mode_t mode; off_t size; jdupes_ino_t inode; jdupes_hash_t filehash_partial; jdupes_hash_t filehash; time_t mtime; uint32_t flags; /* Status flags */ #ifndef NO_USER_ORDER unsigned int user_order; /* Order of the originating command-line parameter */ #endif #ifndef NO_HARDLINKS nlink_t nlink; #endif #ifndef NO_PERMS uid_t uid; gid_t gid; #endif } file_t; typedef struct _filetree { file_t *file; struct _filetree *left; struct _filetree *right; } filetree_t; /* -X exclusion parameter stack */ struct exclude { struct exclude *next; unsigned int flags; int64_t size; char param[]; }; /* Exclude parameter flags */ #define X_DIR 0x00000001U #define X_SIZE_EQ 0x00000002U #define X_SIZE_GT 0x00000004U #define X_SIZE_LT 0x00000008U /* The X-than-or-equal are combination flags */ #define X_SIZE_GTEQ 0x00000006U #define X_SIZE_LTEQ 0x0000000aU /* Size specifier flags */ #define XX_EXCL_SIZE 0x0000000eU /* Flags that use numeric offset instead of a string */ #define XX_EXCL_OFFSET 0x0000000eU /* Flags that require a data parameter */ #define XX_EXCL_DATA 0x0000000fU /* Exclude definition array */ struct exclude_tags { const char * const tag; const uint32_t flags; }; /* Suffix definitions (treat as case-insensitive) */ struct size_suffix { const char * const suffix; const int64_t multiplier; }; const char *FILE_MODE_RO = "rb"; const char dir_sep = '/'; /* Behavior modification flags */ static uint_fast32_t flags = 0, p_flags = 0; static const char *program_name; /* This gets used in many functions */ static struct stat s; /* Larger chunk size makes large files process faster but uses more RAM */ #ifndef CHUNK_SIZE #define CHUNK_SIZE 32768 #endif #define PARTIAL_HASH_SIZE 4096 /* Maximum path buffer size to use; must be large enough for a path plus * any work that might be done to the array it's stored in. PATH_MAX is * not always true. Read this article on the false promises of PATH_MAX: * http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html */ #define PATHBUF_SIZE 4096 /* Size suffixes - this gets exported */ static const struct size_suffix size_suffix[] = { /* Byte (someone may actually try to use this) */ { "b", 1 }, { "k", 1024 }, { "kib", 1024 }, { "m", 1048576 }, { "mib", 1048576 }, { "g", (uint64_t)1048576 * 1024 }, { "gib", (uint64_t)1048576 * 1024 }, { "t", (uint64_t)1048576 * 1048576 }, { "tib", (uint64_t)1048576 * 1048576 }, { "p", (uint64_t)1048576 * 1048576 * 1024}, { "pib", (uint64_t)1048576 * 1048576 * 1024}, { "e", (uint64_t)1048576 * 1048576 * 1048576}, { "eib", (uint64_t)1048576 * 1048576 * 1048576}, /* Decimal suffixes */ { "kb", 1000 }, { "mb", 1000000 }, { "gb", 1000000000 }, { "tb", 1000000000000 }, { "pb", 1000000000000000 }, { "eb", 1000000000000000000 }, { NULL, 0 }, }; /* Tree to track each directory traversed */ struct travdone { struct travdone *left; struct travdone *right; jdupes_ino_t inode; dev_t device; }; static struct travdone *travdone_head = NULL; /* Exclusion tree head and static tag list */ struct exclude *exclude_head = NULL; static const struct exclude_tags exclude_tags[] = { { "dir", X_DIR }, { "size+", X_SIZE_GT }, { "size+=", X_SIZE_GTEQ }, { "size-=", X_SIZE_LTEQ }, { "size-", X_SIZE_LT }, { "size=", X_SIZE_EQ }, { NULL, 0 }, }; /* Required for progress indicator code */ static uintmax_t filecount = 0; static uintmax_t progress = 0, item_progress = 0, dupecount = 0; /* Number of read loops before checking progress indicator */ #define CHECK_MINIMUM 256 /* File tree head */ static filetree_t *checktree = NULL; /* Directory/file parameter position counter */ static unsigned int user_item_count = 1; /* registerfile() direction options */ enum tree_direction { NONE, LEFT, RIGHT }; /* Sort order reversal */ static int sort_direction = 1; /* Signal handler */ static int interrupt = 0; /* Progress indicator time */ struct timeval time1, time2; /* for temporary path mangling */ static char tempname[PATHBUF_SIZE * 2]; /***** End definitions, begin code *****/ /* Catch CTRL-C and either notify or terminate */ void sighandler(const int signum) { (void)signum; if (interrupt || !ISFLAG(flags, F_SOFTABORT)) { fprintf(stderr, "\n"); exit(EXIT_FAILURE); } interrupt = 1; return; } void sigusr1(const int signum) { (void)signum; if (!ISFLAG(flags, F_SOFTABORT)) SETFLAG(flags, F_SOFTABORT); else CLEARFLAG(flags, F_SOFTABORT); return; } /* Out of memory */ static void oom(const char * const restrict msg) { fprintf(stderr, "\nout of memory: %s\n", msg); exit(EXIT_FAILURE); } /* Null pointer failure */ static void nullptr(const char * restrict func) { static const char n[] = "(NULL)"; if (func == NULL) func = n; fprintf(stderr, "\ninternal error: NULL pointer passed to %s\n", func); exit(EXIT_FAILURE); } /* Jody Bruchon's fast hashing function * Copyright (C) 2014-2020 by Jody Bruchon * Released under The MIT License */ #define JODY_HASH_SHIFT 14 #define JODY_HASH_CONSTANT 0x1f3d5b79U static const jodyhash_t tail_mask[] = { 0x00000000, 0x000000ff, 0x0000ffff, 0x00ffffff, 0xffffffff, }; static jodyhash_t jody_block_hash(const jodyhash_t * restrict data, const jodyhash_t start_hash, const size_t count) { jodyhash_t hash = start_hash; jodyhash_t element; jodyhash_t partial_salt; size_t len; /* Don't bother trying to hash a zero-length block */ if (count == 0) return hash; len = count / sizeof(jodyhash_t); for (; len > 0; len--) { element = *data; hash += element; hash += JODY_HASH_CONSTANT; hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT); /* bit rotate left */ hash ^= element; hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT); hash ^= JODY_HASH_CONSTANT; hash += element; data++; } /* Handle data tail (for blocks indivisible by sizeof(jodyhash_t)) */ len = count & (sizeof(jodyhash_t) - 1); if (len) { partial_salt = JODY_HASH_CONSTANT & tail_mask[len]; element = *data & tail_mask[len]; hash += element; hash += partial_salt; hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT); hash ^= element; hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT); hash ^= partial_salt; hash += element; } return hash; } /* Compare two hashes like memcmp() */ #define HASH_COMPARE(a,b) ((a > b) ? 1:((a == b) ? 0:-1)) static inline char **cloneargs(const int argc, char **argv) { static int x; static char **args; args = (char **)malloc(sizeof(char *) * (unsigned int)argc); if (args == NULL) oom("cloneargs() start"); for (x = 0; x < argc; x++) { args[x] = (char *)malloc(strlen(argv[x]) + 1); if (args[x] == NULL) oom("cloneargs() loop"); strcpy(args[x], argv[x]); } return args; } static int findarg(const char * const arg, const int start, const int argc, char **argv) { int x; for (x = start; x < argc; x++) if (strcmp(argv[x], arg) == 0) return x; return x; } /* Find the first non-option argument after specified option. */ static int nonoptafter(const char *option, const int argc, char **oldargv, char **newargv) { int x; int targetind; int testind; int startat = 1; targetind = findarg(option, 1, argc, oldargv); for (x = optind; x < argc; x++) { testind = findarg(newargv[x], startat, argc, oldargv); if (testind > targetind) return x; else startat = testind; } return x; } /* Update progress indicator if requested */ static void update_progress(const char * const restrict msg, const int file_percent) { static int did_fpct = 0; /* The caller should be doing this anyway...but don't trust that they did */ if (ISFLAG(flags, F_HIDEPROGRESS)) return; gettimeofday(&time2, NULL); if (progress == 0 || time2.tv_sec > time1.tv_sec) { fprintf(stderr, "\rProgress [%" PRIuMAX "/%" PRIuMAX ", %" PRIuMAX " pairs matched] %" PRIuMAX "%%", progress, filecount, dupecount, (progress * 100) / filecount); if (file_percent > -1 && msg != NULL) { fprintf(stderr, " (%s: %d%%) ", msg, file_percent); did_fpct = 1; } else if (did_fpct != 0) { fprintf(stderr, " "); did_fpct = 0; } fflush(stderr); } time1.tv_sec = time2.tv_sec; return; } /* Check file's stat() info to make sure nothing has changed * Returns 1 if changed, 0 if not changed, negative if error */ static int file_has_changed(file_t * const restrict file) { /* If -t/--nochangecheck specified then completely bypass this code */ if (ISFLAG(flags, F_NOCHANGECHECK)) return 0; if (file == NULL || file->d_name == NULL) nullptr("file_has_changed()"); if (!ISFLAG(file->flags, F_VALID_STAT)) return -66; if (stat(file->d_name, &s) != 0) return -2; if (file->inode != s.st_ino) return 1; if (file->size != s.st_size) return 1; if (file->device != s.st_dev) return 1; if (file->mtime != s.st_mtime) return 1; if (file->mode != s.st_mode) return 1; #ifndef NO_PERMS if (file->uid != s.st_uid) return 1; if (file->gid != s.st_gid) return 1; #endif #ifndef NO_SYMLINKS if (lstat(file->d_name, &s) != 0) return -3; if ((S_ISLNK(s.st_mode) > 0) ^ ISFLAG(file->flags, F_IS_SYMLINK)) return 1; #endif return 0; } static inline int getfilestats(file_t * const restrict file) { if (file == NULL || file->d_name == NULL) nullptr("getfilestats()"); /* Don't stat the same file more than once */ if (ISFLAG(file->flags, F_VALID_STAT)) return 0; SETFLAG(file->flags, F_VALID_STAT); if (stat(file->d_name, &s) != 0) return -1; file->inode = s.st_ino; file->size = s.st_size; file->device = s.st_dev; file->mtime = s.st_mtime; file->mode = s.st_mode; #ifndef NO_HARDLINKS file->nlink = s.st_nlink; #endif #ifndef NO_PERMS file->uid = s.st_uid; file->gid = s.st_gid; #endif #ifndef NO_SYMLINKS if (lstat(file->d_name, &s) != 0) return -1; if (S_ISLNK(s.st_mode) > 0) SETFLAG(file->flags, F_IS_SYMLINK); #endif return 0; } static void add_exclude(const char *option) { char *opt, *p; struct exclude *excl = exclude_head; const struct exclude_tags *tags = exclude_tags; const struct size_suffix *ss = size_suffix; if (option == NULL) nullptr("add_exclude()"); opt = malloc(strlen(option) + 1); if (opt == NULL) oom("add_exclude option"); strcpy(opt, option); p = opt; while (*p != ':' && *p != '\0') p++; /* Split tag string into *opt (tag) and *p (value) */ if (*p == ':') { *p = '\0'; p++; } while (tags->tag != NULL && strcmp(tags->tag, opt) != 0) tags++; if (tags->tag == NULL) goto bad_tag; /* Check for a tag that requires a value */ if (tags->flags & XX_EXCL_DATA && *p == '\0') goto spec_missing; /* *p is now at the value, NOT the tag string! */ if (exclude_head != NULL) { /* Add to end of exclusion stack if head is present */ while (excl->next != NULL) excl = excl->next; excl->next = malloc(sizeof(struct exclude) + strlen(p) + 1); if (excl->next == NULL) oom("add_exclude alloc"); excl = excl->next; } else { /* Allocate exclude_head if no exclusions exist yet */ exclude_head = malloc(sizeof(struct exclude) + strlen(p) + 1); if (exclude_head == NULL) oom("add_exclude alloc"); excl = exclude_head; } /* Set tag value from predefined tag array */ excl->flags = tags->flags; /* Initialize the new exclude element */ excl->next = NULL; if (excl->flags & XX_EXCL_OFFSET) { /* Exclude uses a number; handle it with possible suffixes */ *(excl->param) = '\0'; /* Get base size */ if (*p < '0' || *p > '9') goto bad_size_suffix; excl->size = strtoll(p, &p, 10); /* Handle suffix, if any */ if (*p != '\0') { while (ss->suffix != NULL && strcasecmp(ss->suffix, p) != 0) ss++; if (ss->suffix == NULL) goto bad_size_suffix; excl->size *= ss->multiplier; } } else { /* Exclude uses string data; just copy it */ excl->size = 0; if (*p != '\0') strcpy(excl->param, p); else *(excl->param) = '\0'; } free(opt); return; spec_missing: fprintf(stderr, "Exclude spec missing or invalid: -X spec:data\n"); exit(EXIT_FAILURE); bad_tag: fprintf(stderr, "Invalid exclusion tag was specified\n"); exit(EXIT_FAILURE); bad_size_suffix: fprintf(stderr, "Invalid -X size suffix specified; use B or KMGTPE[i][B]\n"); exit(EXIT_FAILURE); } static int getdirstats(const char * const restrict name, jdupes_ino_t * const restrict inode, dev_t * const restrict dev, jdupes_mode_t * const restrict mode) { if (name == NULL || inode == NULL || dev == NULL) nullptr("getdirstats"); if (stat(name, &s) != 0) return -1; *inode = s.st_ino; *dev = s.st_dev; *mode = s.st_mode; if (!S_ISDIR(s.st_mode)) return 1; return 0; } /* Check a pair of files for match exclusion conditions * Returns: * 0 if all condition checks pass * -1 or 1 on compare result less/more * -2 on an absolute exclusion condition met * 2 on an absolute match condition met * -3 on exclusion due to isolation * -4 on exlusion due to same filesystem * -5 on exclusion due to permissions */ static int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2) { if (file1 == NULL || file2 == NULL || file1->d_name == NULL || file2->d_name == NULL) nullptr("check_conditions()"); /* Exclude files that are not the same size */ if (file1->size > file2->size) return -1; if (file1->size < file2->size) return 1; #ifndef NO_USER_ORDER /* Exclude based on -I/--isolate */ if (ISFLAG(flags, F_ISOLATE) && (file1->user_order == file2->user_order)) return -3; #endif /* NO_USER_ORDER */ /* Exclude based on -1/--one-file-system */ if (ISFLAG(flags, F_ONEFS) && (file1->device != file2->device)) return -4; /* Exclude files by permissions if requested */ if (ISFLAG(flags, F_PERMISSIONS) && (file1->mode != file2->mode #ifndef NO_PERMS || file1->uid != file2->uid || file1->gid != file2->gid #endif )) { return -5; } /* Hard link and symlink + '-s' check */ #ifndef NO_HARDLINKS if ((file1->inode == file2->inode) && (file1->device == file2->device)) { if (ISFLAG(flags, F_CONSIDERHARDLINKS)) return 2; else return -2; } #endif /* Fall through: all checks passed */ return 0; } /* Check for exclusion conditions for a single file (1 = fail) */ static int check_singlefile(file_t * const restrict newfile) { char * restrict tp = tempname; int excluded; if (newfile == NULL) nullptr("check_singlefile()"); /* Exclude hidden files if requested */ if (ISFLAG(flags, F_EXCLUDEHIDDEN)) { if (newfile->d_name == NULL) nullptr("check_singlefile newfile->d_name"); strcpy(tp, newfile->d_name); tp = basename(tp); if (tp[0] == '.' && strcmp(tp, ".") && strcmp(tp, "..")) return 1; } /* Get file information and check for validity */ const int i = getfilestats(newfile); if (i || newfile->size == -1) return 1; if (!S_ISDIR(newfile->mode)) { /* Exclude zero-length files if requested */ if (newfile->size == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) return 1; /* Exclude files based on exclusion stack size specs */ excluded = 0; for (struct exclude *excl = exclude_head; excl != NULL; excl = excl->next) { uint32_t sflag = excl->flags & XX_EXCL_SIZE; if ( ((sflag == X_SIZE_EQ) && (newfile->size != excl->size)) || ((sflag == X_SIZE_LTEQ) && (newfile->size <= excl->size)) || ((sflag == X_SIZE_GTEQ) && (newfile->size >= excl->size)) || ((sflag == X_SIZE_GT) && (newfile->size > excl->size)) || ((sflag == X_SIZE_LT) && (newfile->size < excl->size)) ) excluded = 1; } if (excluded) return 1; } return 0; } static file_t *init_newfile(const size_t len, file_t * restrict * const restrict filelistp) { file_t * const restrict newfile = (file_t *)malloc(sizeof(file_t)); if (!newfile) oom("init_newfile() file structure"); if (!filelistp) nullptr("init_newfile() filelistp"); memset(newfile, 0, sizeof(file_t)); newfile->d_name = (char *)malloc(len); if (!newfile->d_name) oom("init_newfile() filename"); newfile->next = *filelistp; #ifndef NO_USER_ORDER newfile->user_order = user_item_count; #endif newfile->size = -1; newfile->duplicates = NULL; return newfile; } /* Create a new traversal check object and initialize its values */ static struct travdone *travdone_alloc(const jdupes_ino_t inode, const dev_t device) { struct travdone *trav; trav = (struct travdone *)malloc(sizeof(struct travdone)); if (trav == NULL) return NULL; trav->left = NULL; trav->right = NULL; trav->inode = inode; trav->device = device; return trav; } /* De-allocate the travdone tree */ static void travdone_free(struct travdone * const restrict cur) { if (cur == NULL) return; if (cur->left != NULL) travdone_free(cur->left); if (cur->right != NULL) travdone_free(cur->right); free(cur); return; } /* Add a single file to the file tree */ static inline file_t *grokfile(const char * const restrict name, file_t * restrict * const restrict filelistp) { file_t * restrict newfile; if (!name || !filelistp) nullptr("grokfile()"); /* Allocate the file_t and the d_name entries */ newfile = init_newfile(strlen(name) + 2, filelistp); strcpy(newfile->d_name, name); /* Single-file [l]stat() and exclusion condition check */ if (check_singlefile(newfile) != 0) { free(newfile->d_name); free(newfile); return NULL; } return newfile; } /* Count the following statistics: - Maximum number of files in a duplicate set (length of longest dupe chain) - Number of non-zero-length files that have duplicates (if n_files != NULL) - Total number of duplicate file sets (groups) */ static unsigned int get_max_dupes(const file_t *files, unsigned int * const restrict max, unsigned int * const restrict n_files) { unsigned int groups = 0; if (files == NULL || max == NULL) nullptr("get_max_dupes()"); *max = 0; if (n_files) *n_files = 0; while (files) { unsigned int n_dupes; if (ISFLAG(files->flags, F_HAS_DUPES)) { groups++; if (n_files && files->size) (*n_files)++; n_dupes = 1; for (file_t *curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) n_dupes++; if (n_dupes > *max) *max = n_dupes; } files = files->next; } return groups; } /* BTRFS deduplication of file blocks */ #ifdef ENABLE_DEDUPE /* Message to append to BTRFS warnings based on write permissions */ static const char *readonly_msg[] = { "", " (no write permission)" }; static char *dedupeerrstr(int err) { tempname[sizeof(tempname)-1] = '\0'; if (err == FILE_DEDUPE_RANGE_DIFFERS) { snprintf(tempname, sizeof(tempname), "FILE_DEDUPE_RANGE_DIFFERS (data modified in the meantime?)"); return tempname; } else if (err < 0) { return strerror(-err); } else { snprintf(tempname, sizeof(tempname), "Unknown error %d", err); return tempname; } } static void dedupefiles(file_t * restrict files) { struct utsname utsname; struct file_dedupe_range *same; char **dupe_filenames; /* maps to same->info indices */ file_t *curfile; unsigned int n_dupes, max_dupes, cur_info; unsigned int cur_file = 0, max_files, total_files = 0; int fd; int ret, status, readonly; /* Refuse to dedupe on 2.x kernels; they could damage user data */ if (uname(&utsname)) { fprintf(stderr, "Failed to get kernel version! Aborting.\n"); exit(EXIT_FAILURE); } if (*(utsname.release) == '2' && *(utsname.release + 1) == '.') { fprintf(stderr, "Refusing to dedupe on a 2.x kernel; data loss could occur. Aborting.\n"); exit(EXIT_FAILURE); } /* Find the largest dupe set, alloc space to hold structs for it */ get_max_dupes(files, &max_dupes, &max_files); /* Kernel dupe count is a uint16_t so exit if the type's limit is exceeded */ if (max_dupes > 65535) { fprintf(stderr, "Largest duplicate set (%d) exceeds the 65535-file dedupe limit.\n", max_dupes); fprintf(stderr, "Ask the program author to add this feature if you really need it. Exiting!\n"); exit(EXIT_FAILURE); } same = calloc(sizeof(struct file_dedupe_range) + sizeof(struct file_dedupe_range_info) * max_dupes, 1); dupe_filenames = malloc(max_dupes * sizeof(char *)); if (!same || !dupe_filenames) oom("dedupefiles() structures"); /* Main dedupe loop */ while (files) { if (ISFLAG(files->flags, F_HAS_DUPES) && files->size) { cur_file++; if (!ISFLAG(flags, F_HIDEPROGRESS)) { fprintf(stderr, "Dedupe [%u/%u] %u%% \r", cur_file, max_files, cur_file * 100 / max_files); } /* Open each file to be deduplicated */ cur_info = 0; for (curfile = files->duplicates; curfile; curfile = curfile->duplicates) { int errno2; /* Never allow hard links to be passed to dedupe */ if (curfile->device == files->device && curfile->inode == files->inode) continue; dupe_filenames[cur_info] = curfile->d_name; readonly = 0; if (access(curfile->d_name, W_OK) != 0) readonly = 1; fd = open(curfile->d_name, O_RDWR); /* If read-write open fails, privileged users can dedupe in read-only mode */ if (fd == -1) { /* Preserve errno in case read-only fallback fails */ errno2 = errno; fd = open(curfile->d_name, O_RDONLY); if (fd == -1) { fprintf(stderr, "Unable to open '%s': %s%s\n", curfile->d_name, strerror(errno2), readonly_msg[readonly]); continue; } } same->info[cur_info].fd = fd; same->info[cur_info].logical_offset = 0; cur_info++; total_files++; } n_dupes = cur_info; same->logical_offset = 0; same->length = (uint64_t)files->size; same->dest_count = (uint16_t)n_dupes; /* kernel type is __u16 */ fd = open(files->d_name, O_RDONLY); if (fd == -1) { fprintf(stderr, "unable to open(\"%s\", O_RDONLY): %s\n", files->d_name, strerror(errno)); goto cleanup; } /* Call dedupe ioctl to pass the files to the kernel */ ret = ioctl(fd, BTRFS_IOC_FILE_EXTENT_SAME, same); if (close(fd) == -1) fprintf(stderr, "Unable to close(\"%s\"): %s\n", files->d_name, strerror(errno)); if (ret < 0) { fprintf(stderr, "dedupe failed against file '%s' (%d matches): %s\n", files->d_name, n_dupes, strerror(errno)); goto cleanup; } for (cur_info = 0; cur_info < n_dupes; cur_info++) { status = same->info[cur_info].status; if (status != 0) { if (same->info[cur_info].bytes_deduped == 0) { fprintf(stderr, "warning: dedupe failed: %s => %s: %s [%d]%s\n", files->d_name, dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]); } else { fprintf(stderr, "warning: dedupe only did %" PRIdMAX " bytes: %s => %s: %s [%d]%s\n", (intmax_t)same->info[cur_info].bytes_deduped, files->d_name, dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]); } } } cleanup: for (cur_info = 0; cur_info < n_dupes; cur_info++) { if (close((int)same->info[cur_info].fd) == -1) { fprintf(stderr, "unable to close(\"%s\"): %s", dupe_filenames[cur_info], strerror(errno)); } } } /* has dupes */ files = files->next; } if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "Deduplication done (%d files processed)\n", total_files); free(same); free(dupe_filenames); return; } #endif /* ENABLE_DEDUPE */ /* Delete duplicate files automatically or interactively */ static void deletefiles(file_t *files, int prompt, FILE *tty) { unsigned int counter, groups; unsigned int curgroup = 0; file_t *tmpfile; file_t **dupelist; unsigned int *preserve; char *preservestr; char *token; char *tstr; unsigned int number, sum, max, x; size_t i; if (!files) return; groups = get_max_dupes(files, &max, NULL); max++; dupelist = (file_t **) malloc(sizeof(file_t*) * max); preserve = (unsigned int *) malloc(sizeof(int) * max); preservestr = (char *) malloc(INPUT_SIZE); if (!dupelist || !preserve || !preservestr) oom("deletefiles() structures"); for (; files; files = files->next) { if (ISFLAG(files->flags, F_HAS_DUPES)) { curgroup++; counter = 1; dupelist[counter] = files; if (prompt) { printf("[%u] %s\n", counter, files->d_name); } tmpfile = files->duplicates; while (tmpfile) { dupelist[++counter] = tmpfile; if (prompt) { printf("[%u] %s\n", counter, tmpfile->d_name); } tmpfile = tmpfile->duplicates; } if (prompt) printf("\n"); /* preserve only the first file */ if (!prompt) { preserve[1] = 1; for (x = 2; x <= counter; x++) preserve[x] = 0; } else do { /* prompt for files to preserve */ printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one)", curgroup, groups, counter); if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%" PRIuMAX " byte%c each)", (uintmax_t)files->size, (files->size != 1) ? 's' : ' '); printf(": "); fflush(stdout); /* treat fgets() failure as if nothing was entered */ if (!fgets(preservestr, INPUT_SIZE, tty)) preservestr[0] = '\n'; i = strlen(preservestr) - 1; /* tail of buffer must be a newline */ while (preservestr[i] != '\n') { tstr = (char *)realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE); if (!tstr) oom("deletefiles() prompt string"); preservestr = tstr; if (!fgets(preservestr + i + 1, INPUT_SIZE, tty)) { preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */ break; } i = strlen(preservestr) - 1; } for (x = 1; x <= counter; x++) preserve[x] = 0; token = strtok(preservestr, " ,\n"); if (token != NULL && (*token == 'n' || *token == 'N')) goto preserve_none; while (token != NULL) { if (*token == 'a' || *token == 'A') for (x = 0; x <= counter; x++) preserve[x] = 1; number = 0; sscanf(token, "%u", &number); if (number > 0 && number <= counter) preserve[number] = 1; token = strtok(NULL, " ,\n"); } for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x]; } while (sum < 1); /* save at least one file */ preserve_none: printf("\n"); for (x = 1; x <= counter; x++) { if (preserve[x]) { printf(" [+] %s\n", dupelist[x]->d_name); } else { if (file_has_changed(dupelist[x])) { printf(" [!] %s", dupelist[x]->d_name); printf("-- file changed since being scanned\n"); } else if (remove(dupelist[x]->d_name) == 0) { printf(" [-] %s\n", dupelist[x]->d_name); } else { printf(" [!] %s", dupelist[x]->d_name); printf("-- unable to delete file\n"); } } } printf("\n"); } } free(dupelist); free(preserve); free(preservestr); return; } /* Hard link or symlink files */ /* Compile out link code if no linking support is built in */ #if !(defined NO_HARDLINKS && defined NO_SYMLINKS) static void linkfiles(file_t *files, const int hard) { static file_t *tmpfile; static file_t *srcfile; static file_t *curfile; static file_t ** restrict dupelist; static unsigned int counter; static unsigned int max = 0; static unsigned int x = 0; static size_t name_len = 0; static int i, success; #ifndef NO_SYMLINKS static unsigned int symsrc; #endif curfile = files; while (curfile) { if (ISFLAG(curfile->flags, F_HAS_DUPES)) { counter = 1; tmpfile = curfile->duplicates; while (tmpfile) { counter++; tmpfile = tmpfile->duplicates; } if (counter > max) max = counter; } curfile = curfile->next; } max++; dupelist = (file_t**) malloc(sizeof(file_t*) * max); if (!dupelist) oom("linkfiles() dupelist"); while (files) { if (ISFLAG(files->flags, F_HAS_DUPES)) { counter = 1; dupelist[counter] = files; tmpfile = files->duplicates; while (tmpfile) { counter++; dupelist[counter] = tmpfile; tmpfile = tmpfile->duplicates; } /* Link every file to the first file */ if (hard) { #ifndef NO_HARDLINKS x = 2; srcfile = dupelist[1]; #endif } else { #ifndef NO_SYMLINKS x = 1; /* Symlinks should target a normal file if one exists */ srcfile = NULL; for (symsrc = 1; symsrc <= counter; symsrc++) { if (!ISFLAG(dupelist[symsrc]->flags, F_IS_SYMLINK)) { srcfile = dupelist[symsrc]; break; } } /* If no normal file exists, abort */ if (srcfile == NULL) continue; #endif } if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("[SRC] %s\n", srcfile->d_name); } for (; x <= counter; x++) { if (hard == 1) { /* Can't hard link files on different devices */ if (srcfile->device != dupelist[x]->device) { fprintf(stderr, "warning: hard link target on different device, not linking:\n-//-> %s\n", dupelist[x]->d_name); continue; } else { /* The devices for the files are the same, but we still need to skip * anything that is already hard linked (-L and -H both set) */ if (srcfile->inode == dupelist[x]->inode) { /* Don't show == arrows when not matching against other hard links */ if (ISFLAG(flags, F_CONSIDERHARDLINKS)) if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("-==-> %s\n", dupelist[x]->d_name); } continue; } } } else { /* Symlink prerequisite check code can go here */ /* Do not attempt to symlink a file to itself or to another symlink */ #ifndef NO_SYMLINKS if (ISFLAG(dupelist[x]->flags, F_IS_SYMLINK) && ISFLAG(dupelist[symsrc]->flags, F_IS_SYMLINK)) continue; if (x == symsrc) continue; #endif } /* Do not attempt to hard link files for which we don't have write access */ if (access(dupelist[x]->d_name, W_OK) != 0) { fprintf(stderr, "warning: link target is a read-only file, not linking:\n-//-> %s\n", dupelist[x]->d_name); continue; } /* Check file pairs for modification before linking */ /* Safe linking: don't actually delete until the link succeeds */ i = file_has_changed(srcfile); if (i) { fprintf(stderr, "warning: source file modified since scanned; changing source file:\n[SRC] %s\n", dupelist[x]->d_name); srcfile = dupelist[x]; continue; } if (file_has_changed(dupelist[x])) { fprintf(stderr, "warning: target file modified since scanned, not linking:\n-//-> %s\n", dupelist[x]->d_name); continue; } /* Make sure the name will fit in the buffer before trying */ name_len = strlen(dupelist[x]->d_name) + 14; if (name_len > PATHBUF_SIZE) continue; /* Assemble a temporary file name */ strcpy(tempname, dupelist[x]->d_name); strcat(tempname, ".__jdupes__.tmp"); /* Rename the source file to the temporary name */ i = rename(dupelist[x]->d_name, tempname); if (i != 0) { fprintf(stderr, "warning: cannot move link target to a temporary name, not linking:\n-//-> %s\n", dupelist[x]->d_name); /* Just in case the rename succeeded yet still returned an error, roll back the rename */ rename(tempname, dupelist[x]->d_name); continue; } /* Create the desired hard link with the original file's name */ errno = 0; success = 0; if (success) { if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("%s %s\n", hard ? "---->" : "-@@->", dupelist[x]->d_name); } } else { /* The link failed. Warn the user and put the link target back */ if (!ISFLAG(flags, F_HIDEPROGRESS)) { printf("-//-> %s\n", dupelist[x]->d_name); } fprintf(stderr, "warning: unable to link '%s' -> '%s': %s\n", dupelist[x]->d_name, srcfile->d_name, strerror(errno)); i = rename(tempname, dupelist[x]->d_name); if (i != 0) { fprintf(stderr, "error: cannot rename temp file back to original\n"); fprintf(stderr, "original: %s\n", dupelist[x]->d_name); fprintf(stderr, "current: %s\n", tempname); } continue; } /* Remove temporary file to clean up; if we can't, reverse the linking */ i = remove(tempname); if (i != 0) { /* If the temp file can't be deleted, there may be a permissions problem * so reverse the process and warn the user */ fprintf(stderr, "\nwarning: can't delete temp file, reverting: %s\n", tempname); i = remove(dupelist[x]->d_name); /* This last error really should not happen, but we can't assume it won't */ if (i != 0) fprintf(stderr, "\nwarning: couldn't remove link to restore original file\n"); else { i = rename(tempname, dupelist[x]->d_name); if (i != 0) { fprintf(stderr, "\nwarning: couldn't revert the file to its original name\n"); fprintf(stderr, "original: %s\n", dupelist[x]->d_name); fprintf(stderr, "current: %s\n", tempname); } } } } if (!ISFLAG(flags, F_HIDEPROGRESS)) printf("\n"); } files = files->next; } free(dupelist); return; } #endif /* NO_HARDLINKS && NO_SYMLINKS */ /* Print matched file sets */ static int fwprint(FILE * const restrict stream, const char * const restrict str, const int cr) { if (cr == 2) return fprintf(stream, "%s%c", str, 0); else return fprintf(stream, "%s%s", str, cr == 1 ? "\n" : ""); } /* Print comprehensive information to stdout in JSON format */ #define TO_HEX(a) (char)(((a) & 0x0f) <= 0x09 ? (a) + 0x30 : (a) + 0x57) static void json_escape(char *string, char *escaped) { int length = 0; while (*string != '\0' && length < (PATH_MAX * 2 - 1)) { switch (*string) { case '\"': case '\\': *escaped++ = '\\'; *escaped++ = *string++; length += 2; break; default: if (!(*string & 0xe0)) { strcpy(escaped, "\\u00"); escaped += 4; *escaped++ = TO_HEX((*string >> 4)); *escaped++ = TO_HEX(*string++); length += 6; } else { *escaped++ = *string++; length++; } break; } } *escaped = '\0'; return; } extern void printjson(file_t * restrict files, const int argc, char **argv) { file_t * restrict tmpfile; int arg = 0, comma = 0; char *temp = malloc(PATH_MAX * 2); char *temp2 = malloc(PATH_MAX * 2); char *temp_insert = temp; /* Output information about the jdupes command environment */ printf("{\n \"jdupesVersion\": \"%s\",\n \"jdupesVersionDate\": \"%s\",\n", VER, VERDATE); printf(" \"commandLine\": \""); while (arg < argc) { sprintf(temp_insert, " %s", argv[arg]); temp_insert += strlen(temp_insert); arg++; } json_escape(temp, temp2); printf("%s\",\n", temp2); printf(" \"extensionFlags\": \"standalone\",\n"); printf(" \"matchSets\": [\n"); while (files != NULL) { if (ISFLAG(files->flags, F_HAS_DUPES)) { if (comma) printf(",\n"); printf(" {\n \"fileSize\": %" PRIdMAX ",\n \"fileList\": [\n { \"filePath\": \"", (intmax_t)files->size); sprintf(temp, "%s", files->d_name); json_escape(temp, temp2); fwprint(stdout, temp2, 0); printf("\""); tmpfile = files->duplicates; while (tmpfile != NULL) { printf(" },\n { \"filePath\": \""); sprintf(temp, "%s", tmpfile->d_name); json_escape(temp, temp2); fwprint(stdout, temp2, 0); printf("\""); tmpfile = tmpfile->duplicates; } printf(" }\n ]\n }"); comma = 1; } files = files->next; } printf("\n ]\n}\n"); free(temp); free(temp2); return; } static void printmatches(file_t * restrict files) { file_t * restrict tmpfile; int printed = 0; int cr = 1; if (ISFLAG(flags, F_PRINTNULL)) cr = 2; while (files != NULL) { if (ISFLAG(files->flags, F_HAS_DUPES)) { printed = 1; if (!ISFLAG(flags, F_OMITFIRST)) { if (ISFLAG(flags, F_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size, (files->size != 1) ? 's' : ' '); fwprint(stdout, files->d_name, cr); } tmpfile = files->duplicates; while (tmpfile != NULL) { fwprint(stdout, tmpfile->d_name, cr); tmpfile = tmpfile->duplicates; } if (files->next != NULL) fwprint(stdout, "", cr); } files = files->next; } if (printed == 0) fwprint(stderr, "No duplicates found.", 1); return; } /* Print summary of match statistics to stdout */ static void summarizematches(const file_t * restrict files) { unsigned int numsets = 0; off_t numbytes = 0; int numfiles = 0; while (files != NULL) { file_t *tmpfile; if (ISFLAG(files->flags, F_HAS_DUPES)) { numsets++; tmpfile = files->duplicates; while (tmpfile != NULL) { numfiles++; numbytes += files->size; tmpfile = tmpfile->duplicates; } } files = files->next; } if (numsets == 0) printf("No duplicates found.\n"); else { printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets); if (numbytes < 1000) printf("%" PRIdMAX " byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' '); else if (numbytes <= 1000000) printf("%" PRIdMAX " KB\n", (intmax_t)(numbytes / 1000)); else printf("%" PRIdMAX " MB\n", (intmax_t)(numbytes / 1000000)); } return; } /* Load a directory's contents into the file tree, recursing as needed */ static void grokdir(const char * const restrict dir, file_t * restrict * const restrict filelistp, int recurse) { file_t * restrict newfile; struct dirent *dirinfo; static int grokdir_level = 0; size_t dirlen; struct travdone *traverse; int i, single = 0; jdupes_ino_t inode; dev_t device, n_device; jdupes_mode_t mode; DIR *cd; if (dir == NULL || filelistp == NULL) nullptr("grokdir()"); /* Double traversal prevention tree */ i = getdirstats(dir, &inode, &device, &mode); if (i < 0) goto error_travdone; if (travdone_head == NULL) { travdone_head = travdone_alloc(inode, device); if (travdone_head == NULL) goto error_travdone; } else { traverse = travdone_head; while (1) { if (traverse == NULL) nullptr("grokdir() traverse"); /* Don't re-traverse directories we've already seen */ if (S_ISDIR(mode) && inode == traverse->inode && device == traverse->device) return; else if (inode > traverse->inode || (inode == traverse->inode && device > traverse->device)) { /* Traverse right */ if (traverse->right == NULL) { traverse->right = travdone_alloc(inode, device); if (traverse->right == NULL) goto error_travdone; break; } traverse = traverse->right; continue; } else { /* Traverse left */ if (traverse->left == NULL) { traverse->left = travdone_alloc(inode, device); if (traverse->left == NULL) goto error_travdone; break; } traverse = traverse->left; continue; } } } item_progress++; grokdir_level++; /* if dir is actually a file, just add it to the file tree */ if (i == 1) { newfile = grokfile(dir, filelistp); if (newfile == NULL) return; single = 1; goto add_single_file; } cd = opendir(dir); if (!cd) goto error_cd; while ((dirinfo = readdir(cd)) != NULL) { char * restrict tp = tempname; size_t d_name_len; if (!strcmp(dirinfo->d_name, ".") || !strcmp(dirinfo->d_name, "..")) continue; if (!ISFLAG(flags, F_HIDEPROGRESS)) { gettimeofday(&time2, NULL); if (progress == 0 || time2.tv_sec > time1.tv_sec) { fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " dirs (in %u specified)", progress, item_progress, user_item_count); } time1.tv_sec = time2.tv_sec; } /* Assemble the file's full path name, optimized to avoid strcat() */ dirlen = strlen(dir); d_name_len = strlen(dirinfo->d_name); memcpy(tp, dir, dirlen+1); if (dirlen != 0 && tp[dirlen-1] != dir_sep) { tp[dirlen] = dir_sep; dirlen++; } if (dirlen + d_name_len + 1 >= (PATHBUF_SIZE * 2)) goto error_overflow; tp += dirlen; memcpy(tp, dirinfo->d_name, d_name_len); tp += d_name_len; *tp = '\0'; d_name_len++; /* Allocate the file_t and the d_name entries */ newfile = init_newfile(dirlen + d_name_len + 2, filelistp); tp = tempname; memcpy(newfile->d_name, tp, dirlen + d_name_len); /* Single-file [l]stat() and exclusion condition check */ if (check_singlefile(newfile) != 0) { free(newfile->d_name); free(newfile); continue; } /* Optionally recurse directories, including symlinked ones if requested */ if (S_ISDIR(newfile->mode)) { if (recurse) { /* --one-file-system */ if (ISFLAG(flags, F_ONEFS) && (getdirstats(newfile->d_name, &inode, &n_device, &mode) == 0) && (device != n_device)) { free(newfile->d_name); free(newfile); continue; } #ifndef NO_SYMLINKS else if (ISFLAG(flags, F_FOLLOWLINKS) || !ISFLAG(newfile->flags, F_IS_SYMLINK)) grokdir(newfile->d_name, filelistp, recurse); #else else grokdir(newfile->d_name, filelistp, recurse); #endif } free(newfile->d_name); free(newfile); continue; } else { add_single_file: /* Add regular files to list, including symlink targets if requested */ #ifndef NO_SYMLINKS if (!ISFLAG(newfile->flags, F_IS_SYMLINK) || (ISFLAG(newfile->flags, F_IS_SYMLINK) && ISFLAG(flags, F_FOLLOWLINKS))) { #else if (S_ISREG(newfile->mode)) { #endif *filelistp = newfile; filecount++; progress++; } else { free(newfile->d_name); free(newfile); if (single == 1) { single = 0; goto skip_single; } continue; } } /* Skip directory stuff if adding only a single file */ if (single == 1) { single = 0; goto skip_single; } } closedir(cd); skip_single: grokdir_level--; if (grokdir_level == 0 && !ISFLAG(flags, F_HIDEPROGRESS)) { fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " items (in %u specified)", progress, item_progress, user_item_count); } return; error_travdone: fprintf(stderr, "\ncould not stat dir %s\n", dir); return; error_cd: fprintf(stderr, "\ncould not chdir to %s\n", dir); return; error_overflow: fprintf(stderr, "\nerror: a path buffer overflowed\n"); exit(EXIT_FAILURE); } /* Use Jody Bruchon's hash function on part or all of a file */ static jdupes_hash_t *get_filehash(const file_t * const restrict checkfile, const size_t max_read) { off_t fsize; /* This is an array because we return a pointer to it */ static jdupes_hash_t hash[1]; static jdupes_hash_t *chunk = NULL; FILE *file; int check = 0; if (checkfile == NULL || checkfile->d_name == NULL) nullptr("get_filehash()"); /* Allocate on first use */ if (chunk == NULL) { chunk = (jdupes_hash_t *)malloc(CHUNK_SIZE); if (!chunk) oom("get_filehash() chunk"); } /* Get the file size. If we can't read it, bail out early */ if (checkfile->size == -1) return NULL; fsize = checkfile->size; /* Do not read more than the requested number of bytes */ if (max_read > 0 && fsize > (off_t)max_read) fsize = (off_t)max_read; /* Initialize the hash and file read parameters (with filehash_partial skipped) * * If we already hashed the first chunk of this file, we don't want to * wastefully read and hash it again, so skip the first chunk and use * the computed hash for that chunk as our starting point. */ *hash = 0; if (ISFLAG(checkfile->flags, F_HASH_PARTIAL)) { *hash = checkfile->filehash_partial; /* Don't bother going further if max_read is already fulfilled */ if (max_read != 0 && max_read <= PARTIAL_HASH_SIZE) return hash; } errno = 0; file = fopen(checkfile->d_name, FILE_MODE_RO); if (file == NULL) { fprintf(stderr, "\n%s error opening file %s\n", strerror(errno), checkfile->d_name); return NULL; } /* Actually seek past the first chunk if applicable * This is part of the filehash_partial skip optimization */ if (ISFLAG(checkfile->flags, F_HASH_PARTIAL)) { if (fseeko(file, PARTIAL_HASH_SIZE, SEEK_SET) == -1) { fclose(file); fprintf(stderr, "\nerror seeking in file %s\n", checkfile->d_name); return NULL; } fsize -= PARTIAL_HASH_SIZE; } /* Read the file in CHUNK_SIZE chunks until we've read it all. */ while (fsize > 0) { size_t bytes_to_read; if (interrupt) return 0; bytes_to_read = (fsize >= (off_t)CHUNK_SIZE) ? CHUNK_SIZE : (size_t)fsize; if (fread((void *)chunk, bytes_to_read, 1, file) != 1) { fprintf(stderr, "\nerror reading from file %s\n", checkfile->d_name); fclose(file); return NULL; } *hash = jody_block_hash(chunk, *hash, bytes_to_read); if ((off_t)bytes_to_read > fsize) break; else fsize -= (off_t)bytes_to_read; if (!ISFLAG(flags, F_HIDEPROGRESS)) { check++; if (check > CHECK_MINIMUM) { update_progress("hashing", (int)(((checkfile->size - fsize) * 100) / checkfile->size)); check = 0; } } } fclose(file); return hash; } static void registerfile(filetree_t * restrict * const restrict nodeptr, const enum tree_direction d, file_t * const restrict file) { filetree_t * restrict branch; if (nodeptr == NULL || file == NULL || (d != NONE && *nodeptr == NULL)) nullptr("registerfile()"); /* Allocate and initialize a new node for the file */ branch = (filetree_t *)malloc(sizeof(filetree_t)); if (branch == NULL) oom("registerfile() branch"); branch->file = file; branch->left = NULL; branch->right = NULL; /* Attach the new node to the requested branch */ switch (d) { case LEFT: (*nodeptr)->left = branch; break; case RIGHT: (*nodeptr)->right = branch; break; case NONE: /* For the root of the tree only */ *nodeptr = branch; break; default: /* This should never ever happen */ fprintf(stderr, "\ninternal error: invalid direction for registerfile(), report this\n"); exit(EXIT_FAILURE); break; } return; } /* Check two files for a match */ static file_t **checkmatch(filetree_t * restrict tree, file_t * const restrict file) { int cmpresult = 0; int cantmatch = 0; const jdupes_hash_t * restrict filehash; if (tree == NULL || file == NULL || tree->file == NULL || tree->file->d_name == NULL || file->d_name == NULL) nullptr("checkmatch()"); /* If device and inode fields are equal one of the files is a * hard link to the other or the files have been listed twice * unintentionally. We don't want to flag these files as * duplicates unless the user specifies otherwise. */ /* If considering hard linked files as duplicates, they are * automatically duplicates without being read further since * they point to the exact same inode. If we aren't considering * hard links as duplicates, we just return NULL. */ cmpresult = check_conditions(tree->file, file); switch (cmpresult) { case 2: return &tree->file; /* linked files + -H switch */ case -2: return NULL; /* linked files, no -H switch */ case -3: /* user order */ case -4: /* one filesystem */ case -5: /* permissions */ cantmatch = 1; cmpresult = 0; break; default: break; } /* Print pre-check (early) match candidates if requested */ if (ISFLAG(p_flags, P_EARLYMATCH)) printf("Early match check passed:\n %s\n %s\n\n", file->d_name, tree->file->d_name); /* If preliminary matching succeeded, do main file data checks */ if (cmpresult == 0) { /* Attempt to exclude files quickly with partial file hashing */ if (!ISFLAG(tree->file->flags, F_HASH_PARTIAL)) { filehash = get_filehash(tree->file, PARTIAL_HASH_SIZE); if (filehash == NULL) return NULL; tree->file->filehash_partial = *filehash; SETFLAG(tree->file->flags, F_HASH_PARTIAL); } if (!ISFLAG(file->flags, F_HASH_PARTIAL)) { filehash = get_filehash(file, PARTIAL_HASH_SIZE); if (filehash == NULL) return NULL; file->filehash_partial = *filehash; SETFLAG(file->flags, F_HASH_PARTIAL); } cmpresult = HASH_COMPARE(file->filehash_partial, tree->file->filehash_partial); /* Print partial hash matching pairs if requested */ if (cmpresult == 0 && ISFLAG(p_flags, P_PARTIAL)) printf("Partial hashes match:\n %s\n %s\n\n", file->d_name, tree->file->d_name); if (file->size <= PARTIAL_HASH_SIZE || ISFLAG(flags, F_PARTIALONLY)) { /* filehash_partial = filehash if file is small enough */ if (!ISFLAG(file->flags, F_HASH_FULL)) { file->filehash = file->filehash_partial; SETFLAG(file->flags, F_HASH_FULL); } if (!ISFLAG(tree->file->flags, F_HASH_FULL)) { tree->file->filehash = tree->file->filehash_partial; SETFLAG(tree->file->flags, F_HASH_FULL); } } else if (cmpresult == 0) { /* If partial match was correct, perform a full file hash match */ if (!ISFLAG(tree->file->flags, F_HASH_FULL)) { filehash = get_filehash(tree->file, 0); if (filehash == NULL) return NULL; tree->file->filehash = *filehash; SETFLAG(tree->file->flags, F_HASH_FULL); } if (!ISFLAG(file->flags, F_HASH_FULL)) { filehash = get_filehash(file, 0); if (filehash == NULL) return NULL; file->filehash = *filehash; SETFLAG(file->flags, F_HASH_FULL); } /* Full file hash comparison */ cmpresult = HASH_COMPARE(file->filehash, tree->file->filehash); } } if( (cantmatch!=0) && (cmpresult==0) ) { cmpresult = -1; } if (cmpresult < 0) { if (tree->left != NULL) { return checkmatch(tree->left, file); } else { registerfile(&tree, LEFT, file); return NULL; } } else if (cmpresult > 0) { if (tree->right != NULL) { return checkmatch(tree->right, file); } else { registerfile(&tree, RIGHT, file); return NULL; } } else { /* All compares matched */ if (ISFLAG(p_flags, P_FULLHASH)) printf("Full hashes match:\n %s\n %s\n\n", file->d_name, tree->file->d_name); return &tree->file; } /* Fall through - should never be reached */ return NULL; } /* Do a byte-by-byte comparison in case two different files produce the same signature. Unlikely, but better safe than sorry. */ static inline int confirmmatch(FILE * const restrict file1, FILE * const restrict file2, off_t size) { static char *c1 = NULL, *c2 = NULL; static size_t r1, r2; off_t bytes = 0; int check = 0; if (file1 == NULL || file2 == NULL) nullptr("confirmmatch()"); /* Allocate on first use; OOM if either is ever NULLed */ if (!c1) { c1 = (char *)malloc(CHUNK_SIZE); c2 = (char *)malloc(CHUNK_SIZE); } if (!c1 || !c2) oom("confirmmatch() c1/c2"); fseek(file1, 0, SEEK_SET); fseek(file2, 0, SEEK_SET); do { if (interrupt) return 0; r1 = fread(c1, sizeof(char), CHUNK_SIZE, file1); r2 = fread(c2, sizeof(char), CHUNK_SIZE, file2); if (r1 != r2) return 0; /* file lengths are different */ if (memcmp (c1, c2, r1)) return 0; /* file contents are different */ if (!ISFLAG(flags, F_HIDEPROGRESS)) { check++; bytes += (off_t)r1; if (check > CHECK_MINIMUM) { update_progress("confirm", (int)((bytes * 100) / size)); check = 0; } } } while (r2); return 1; } #ifndef NO_USER_ORDER static int sort_pairs_by_param_order(file_t *f1, file_t *f2) { if (!ISFLAG(flags, F_USEPARAMORDER)) return 0; if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_param_order()"); if (f1->user_order < f2->user_order) return -sort_direction; if (f1->user_order > f2->user_order) return sort_direction; return 0; } #endif static int sort_pairs_by_mtime(file_t *f1, file_t *f2) { if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_mtime()"); #ifndef NO_USER_ORDER int po = sort_pairs_by_param_order(f1, f2); if (po != 0) return po; #endif /* NO_USER_ORDER */ if (f1->mtime < f2->mtime) return -sort_direction; else if (f1->mtime > f2->mtime) return sort_direction; return 0; } static int sort_pairs_by_filename(file_t *f1, file_t *f2) { if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_filename()"); #ifndef NO_USER_ORDER int po = sort_pairs_by_param_order(f1, f2); if (po != 0) return po; #endif /* NO_USER_ORDER */ int sc = strcmp(f1->d_name, f2->d_name); return ((sort_direction > 0) ? sc : -sc); } static void registerpair(file_t **matchlist, file_t *newmatch, int (*comparef)(file_t *f1, file_t *f2)) { file_t *traverse; file_t *back; /* NULL pointer sanity checks */ if (matchlist == NULL || newmatch == NULL || comparef == NULL) nullptr("registerpair()"); SETFLAG((*matchlist)->flags, F_HAS_DUPES); back = NULL; traverse = *matchlist; /* FIXME: This needs to be changed! As it currently stands, the compare * function only runs on a pair as it is registered and future pairs can * mess up the sort order. A separate sorting function should happen before * the dupe chain is acted upon rather than while pairs are registered. */ while (traverse) { if (comparef(newmatch, traverse) <= 0) { newmatch->duplicates = traverse; if (!back) { *matchlist = newmatch; /* update pointer to head of list */ SETFLAG(newmatch->flags, F_HAS_DUPES); CLEARFLAG(traverse->flags, F_HAS_DUPES); /* flag is only for first file in dupe chain */ } else back->duplicates = newmatch; break; } else { if (traverse->duplicates == 0) { traverse->duplicates = newmatch; if (!back) SETFLAG(traverse->flags, F_HAS_DUPES); break; } } back = traverse; traverse = traverse->duplicates; } return; } static inline void help_text(void) { printf("Usage: jdupes [options] FILES and/or DIRECTORIES...\n\n"); printf("Duplicate file sets will be printed by default unless a different action\n"); printf("option is specified (delete, summarize, link, dedupe, etc.)\n"); printf(" -0 --printnull \toutput nulls instead of CR/LF (like 'find -print0')\n"); printf(" -1 --one-file-system \tdo not match files on different filesystems/devices\n"); printf(" -A --nohidden \texclude hidden files from consideration\n"); #ifdef ENABLE_DEDUPE printf(" -B --dedupe \tsend matches to filesystem for block-level deduplication\n"); #endif printf(" -d --delete \tprompt user for files to preserve and delete all\n"); printf(" \tothers; important: under particular circumstances,\n"); printf(" \tdata may be lost when using this option together\n"); printf(" \twith -s or --symlinks, or when specifying a\n"); printf(" \tparticular directory more than once; refer to the\n"); printf(" \tdocumentation for additional information\n"); printf(" -f --omitfirst \tomit the first file in each set of matches\n"); printf(" -h --help \tdisplay this help message\n"); #ifndef NO_HARDLINKS printf(" -H --hardlinks \ttreat any linked files as duplicate files. Normally\n"); printf(" \tlinked files are treated as non-duplicates for safety\n"); #endif printf(" -i --reverse \treverse (invert) the match sort order\n"); #ifndef NO_USER_ORDER printf(" -I --isolate \tfiles in the same specified directory won't match\n"); #endif printf(" -j --json \tproduce JSON (machine-readable) output\n"); #ifndef NO_SYMLINKS printf(" -l --linksoft \tmake relative symlinks for duplicates w/o prompting\n"); #endif #ifndef NO_HARDLINKS printf(" -L --linkhard \thard link all duplicate files without prompting\n"); #endif /* NO_HARDLINKS */ printf(" -m --summarize \tsummarize dupe information\n"); printf(" -M --printwithsummary\twill print matches and --summarize at the end\n"); printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n"); printf(" \teach set of duplicates and delete the rest without\n"); printf(" \tprompting the user\n"); printf(" -o --order=BY \tselect sort order for output, linking and deleting; by\n"); #ifndef NO_USER_ORDER printf(" -O --paramorder \tParameter order is more important than selected -O sort\n"); printf(" \tmtime (BY=time) or filename (BY=name, the default)\n"); #endif #ifndef NO_PERMS printf(" -p --permissions \tdon't consider files with different owner/group or\n"); printf(" \tpermission bits as duplicates\n"); #endif printf(" -P --print=type \tprint extra info (partial, early, fullhash)\n"); printf(" -q --quiet \thide progress indicator\n"); printf(" -Q --quick \tskip byte-for-byte confirmation for quick matching\n"); printf(" \tWARNING: -Q can result in data loss! Be very careful!\n"); printf(" -r --recurse \tfor every directory, process its subdirectories too\n"); printf(" -R --recurse: \tfor each directory given after this option follow\n"); printf(" \tsubdirectories encountered within (note the ':' at\n"); printf(" \tthe end of the option, manpage for more details)\n"); #ifndef NO_SYMLINKS printf(" -s --symlinks \tfollow symlinks\n"); #endif printf(" -S --size \tshow size of duplicate files\n"); printf(" -t --nochangecheck \tdisable security check for file changes (aka TOCTTOU)\n"); printf(" -T --partial-only \tmatch based on partial hashes only. WARNING:\n"); printf(" \tEXTREMELY DANGEROUS paired with destructive actions!\n"); printf(" \t-T must be specified twice to work. Read the manual!\n"); printf(" -v --version \tdisplay jdupes version and license information\n"); printf(" -x --xsize=SIZE \texclude files of size < SIZE bytes from consideration\n"); printf(" --xsize=+SIZE \t'+' specified before SIZE, exclude size > SIZE\n"); printf(" -X --exclude=spec:info\texclude files based on specified criteria\n"); printf(" \tspecs: size+-=\n"); printf(" \tExclusions are cumulative: -X dir:abc -X dir:efg\n"); printf(" -z --zeromatch \tconsider zero-length files to be duplicates\n"); printf(" -Z --softabort \tIf the user aborts (i.e. CTRL-C) act on matches so far\n"); printf(" \tYou can send SIGUSR1 to the program to toggle this\n"); printf("\nFor sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive)\n"); } int main(int argc, char **argv) { static file_t *files = NULL; static file_t *curfile; static char **oldargv; static char *xs; static int firstrecurse; static int opt; static int pm = 1; static int partialonly_spec = 0; static ordertype_t ordertype = ORDER_NAME; static const struct option long_options[] = { { "loud", 0, 0, '@' }, { "printnull", 0, 0, '0' }, { "one-file-system", 0, 0, '1' }, { "nohidden", 0, 0, 'A' }, { "dedupe", 0, 0, 'B' }, { "chunksize", 1, 0, 'C' }, { "delete", 0, 0, 'd' }, { "debug", 0, 0, 'D' }, { "omitfirst", 0, 0, 'f' }, { "help", 0, 0, 'h' }, { "hardlinks", 0, 0, 'H' }, { "reverse", 0, 0, 'i' }, { "isolate", 0, 0, 'I' }, { "json", 0, 0, 'j' }, { "linksoft", 0, 0, 'l' }, { "linkhard", 0, 0, 'L' }, { "summarize", 0, 0, 'm'}, { "printwithsummary", 0, 0, 'M'}, { "noempty", 0, 0, 'n' }, { "noprompt", 0, 0, 'N' }, { "order", 1, 0, 'o' }, { "paramorder", 0, 0, 'O' }, { "permissions", 0, 0, 'p' }, { "print", 0, 0, 'P' }, { "quiet", 0, 0, 'q' }, { "quick", 0, 0, 'Q' }, { "recurse", 0, 0, 'r' }, { "recursive", 0, 0, 'r' }, { "recurse:", 0, 0, 'R' }, { "recursive:", 0, 0, 'R' }, { "symlinks", 0, 0, 's' }, { "size", 0, 0, 'S' }, { "nochangecheck", 0, 0, 't' }, { "partial-only", 0, 0, 'T' }, { "version", 0, 0, 'v' }, { "xsize", 1, 0, 'x' }, { "exclude", 1, 0, 'X' }, { "zeromatch", 0, 0, 'z' }, { "softabort", 0, 0, 'Z' }, { NULL, 0, 0, 0 } }; /* Is stderr a terminal? If not, we won't write progress to it */ if (!isatty(fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS); program_name = argv[0]; oldargv = cloneargs(argc, argv); while ((opt = getopt_long(argc, argv, "@01ABC:dDfhHiIjlLmMnNOpP:qQrRsStTvVzZo:x:X:", long_options, NULL)) != EOF) { switch (opt) { /* Unsupported but benign options can just be skipped */ case '@': case 'C': case 'D': break; case '0': SETFLAG(flags, F_PRINTNULL); break; case '1': SETFLAG(flags, F_ONEFS); break; case 'A': SETFLAG(flags, F_EXCLUDEHIDDEN); break; case 'd': SETFLAG(flags, F_DELETEFILES); break; case 'f': SETFLAG(flags, F_OMITFIRST); break; case 'h': help_text(); exit(EXIT_FAILURE); #ifndef NO_HARDLINKS case 'H': SETFLAG(flags, F_CONSIDERHARDLINKS); break; case 'L': SETFLAG(flags, F_HARDLINKFILES); break; #endif case 'i': SETFLAG(flags, F_REVERSESORT); break; #ifndef NO_USER_ORDER case 'I': SETFLAG(flags, F_ISOLATE); break; case 'O': SETFLAG(flags, F_USEPARAMORDER); break; #else case 'I': case 'O': fprintf(stderr, "warning: -I and -O are disabled and ignored in this build\n"); break; #endif case 'j': SETFLAG(flags, F_PRINTJSON); break; case 'm': SETFLAG(flags, F_SUMMARIZEMATCHES); break; case 'M': SETFLAG(flags, F_SUMMARIZEMATCHES); SETFLAG(flags, F_PRINTMATCHES); break; case 'n': //fprintf(stderr, "note: -n/--noempty is the default behavior now and is deprecated.\n"); break; case 'N': SETFLAG(flags, F_NOPROMPT); break; case 'p': SETFLAG(flags, F_PERMISSIONS); break; case 'P': if (strcmp(optarg, "partial") == 0) SETFLAG(p_flags, P_PARTIAL); else if (strcmp(optarg, "early") == 0) SETFLAG(p_flags, P_EARLYMATCH); else if (strcmp(optarg, "fullhash") == 0) SETFLAG(p_flags, P_FULLHASH); else { fprintf(stderr, "Option '%s' is not valid for -P\n", optarg); exit(EXIT_FAILURE); } break; case 'q': SETFLAG(flags, F_HIDEPROGRESS); break; case 'Q': SETFLAG(flags, F_QUICKCOMPARE); break; case 'r': SETFLAG(flags, F_RECURSE); break; case 'R': SETFLAG(flags, F_RECURSEAFTER); break; case 't': SETFLAG(flags, F_NOCHANGECHECK); break; case 'T': if (partialonly_spec == 0) partialonly_spec = 1; else { partialonly_spec = 2; SETFLAG(flags, F_PARTIALONLY); } break; #ifndef NO_SYMLINKS case 'l': SETFLAG(flags, F_MAKESYMLINKS); break; case 's': SETFLAG(flags, F_FOLLOWLINKS); break; #endif case 'S': SETFLAG(flags, F_SHOWSIZE); break; case 'z': SETFLAG(flags, F_INCLUDEEMPTY); break; case 'Z': SETFLAG(flags, F_SOFTABORT); break; case 'x': fprintf(stderr, "-x/--xsize is deprecated; use -X size[+-=]:size[suffix] instead\n"); xs = malloc(8 + strlen(optarg)); if (xs == NULL) oom("xsize temp string"); strcpy(xs, "size"); if (*optarg == '+') { strcat(xs, "+:"); optarg++; } else { strcat(xs, "-=:"); } strcat(xs, optarg); add_exclude(xs); free(xs); break; case 'X': add_exclude(optarg); break; case 'v': case 'V': printf("jdupes small stand-alone version (derived from v%s, %s)", VER, VERDATE); printf("\nCopyright (C) 2015-2020 by Jody Bruchon \n"); exit(EXIT_SUCCESS); case 'o': if (!strncasecmp("name", optarg, 5)) { ordertype = ORDER_NAME; } else if (!strncasecmp("time", optarg, 5)) { ordertype = ORDER_TIME; } else { fprintf(stderr, "invalid value for --order: '%s'\n", optarg); exit(EXIT_FAILURE); } break; case 'B': #ifdef ENABLE_DEDUPE SETFLAG(flags, F_DEDUPEFILES); /* btrfs will do the byte-for-byte check itself */ SETFLAG(flags, F_QUICKCOMPARE); /* It is completely useless to dedupe zero-length extents */ CLEARFLAG(flags, F_INCLUDEEMPTY); #else fprintf(stderr, "btrfs dedupe not supported\n"); exit(EXIT_FAILURE); #endif break; default: if (opt != '?') fprintf(stderr, "Sorry, using '-%c' is not supported in this build.\n", opt); fprintf(stderr, "Try `jdupes --help' for more information.\n"); exit(EXIT_FAILURE); } } if (optind >= argc) { fprintf(stderr, "no files or directories specified (use -h option for help)\n"); exit(EXIT_FAILURE); } if (partialonly_spec == 1) { fprintf(stderr, "--partial-only specified only once (it's VERY DANGEROUS, read the manual!)\n"); exit(EXIT_FAILURE); } if (ISFLAG(flags, F_PARTIALONLY) && ISFLAG(flags, F_QUICKCOMPARE)) { fprintf(stderr, "--partial-only overrides --quick and is even more dangerous (read the manual!)\n"); exit(EXIT_FAILURE); } if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) { fprintf(stderr, "options --recurse and --recurse: are not compatible\n"); exit(EXIT_FAILURE); } if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) { fprintf(stderr, "options --summarize and --delete are not compatible\n"); exit(EXIT_FAILURE); } #ifdef ENABLE_DEDUPE if (ISFLAG(flags, F_CONSIDERHARDLINKS) && ISFLAG(flags, F_DEDUPEFILES)) fprintf(stderr, "warning: option --dedupe overrides the behavior of --hardlinks\n"); #endif /* If pm == 0, call printmatches() */ pm = !!ISFLAG(flags, F_SUMMARIZEMATCHES) + !!ISFLAG(flags, F_DELETEFILES) + !!ISFLAG(flags, F_HARDLINKFILES) + !!ISFLAG(flags, F_MAKESYMLINKS) + !!ISFLAG(flags, F_PRINTJSON) + !!ISFLAG(flags, F_DEDUPEFILES); if (pm > 1) { fprintf(stderr, "error: only one final action may be specified.\n"); exit(EXIT_FAILURE); } if (pm == 0) SETFLAG(flags, F_PRINTMATCHES); if (ISFLAG(flags, F_RECURSEAFTER)) { firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv); if (firstrecurse == argc) firstrecurse = nonoptafter("-R", argc, oldargv, argv); if (firstrecurse == argc) { fprintf(stderr, "-R option must be isolated from other options\n"); exit(EXIT_FAILURE); } /* F_RECURSE is not set for directories before --recurse: */ for (int x = optind; x < firstrecurse; x++) { grokdir(argv[x], &files, 0); user_item_count++; } /* Set F_RECURSE for directories after --recurse: */ SETFLAG(flags, F_RECURSE); for (int x = firstrecurse; x < argc; x++) { grokdir(argv[x], &files, 1); user_item_count++; } } else { for (int x = optind; x < argc; x++) { grokdir(argv[x], &files, ISFLAG(flags, F_RECURSE)); user_item_count++; } } /* We don't need the double traversal check tree anymore */ travdone_free(travdone_head); if (ISFLAG(flags, F_REVERSESORT)) sort_direction = -1; if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\n"); if (!files) { fprintf(stderr, "No duplicates found.\n"); exit(EXIT_SUCCESS); } curfile = files; progress = 0; /* Catch CTRL-C */ signal(SIGINT, sighandler); /* Catch SIGUSR1 and use it to enable -Z */ signal(SIGUSR1, sigusr1); while (curfile) { static file_t **match = NULL; static FILE *file1; static FILE *file2; if (interrupt) { fprintf(stderr, "\nStopping file scan due to user abort\n"); if (!ISFLAG(flags, F_SOFTABORT)) exit(EXIT_FAILURE); interrupt = 0; /* reset interrupt for re-use */ goto skip_file_scan; } if (!checktree) registerfile(&checktree, NONE, curfile); else match = checkmatch(checktree, curfile); /* Byte-for-byte check that a matched pair are actually matched */ if (match != NULL) { /* Quick or partial-only compare will never run confirmmatch() * Also skip match confirmation for hard-linked files * (This set of comparisons is ugly, but quite efficient) */ if (ISFLAG(flags, F_QUICKCOMPARE) || ISFLAG(flags, F_PARTIALONLY) || (ISFLAG(flags, F_CONSIDERHARDLINKS) && (curfile->inode == (*match)->inode) && (curfile->device == (*match)->device)) ) { registerpair(match, curfile, (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename); dupecount++; goto skip_full_check; } file1 = fopen(curfile->d_name, FILE_MODE_RO); if (!file1) { curfile = curfile->next; continue; } file2 = fopen((*match)->d_name, FILE_MODE_RO); if (!file2) { fclose(file1); curfile = curfile->next; continue; } if (confirmmatch(file1, file2, curfile->size)) { registerpair(match, curfile, (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename); dupecount++; } fclose(file1); fclose(file2); } skip_full_check: curfile = curfile->next; if (!ISFLAG(flags, F_HIDEPROGRESS)) update_progress(NULL, -1); progress++; } if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%60s\r", " "); skip_file_scan: /* Stop catching CTRL+C */ signal(SIGINT, SIG_DFL); if (ISFLAG(flags, F_DELETEFILES)) { if (ISFLAG(flags, F_NOPROMPT)) deletefiles(files, 0, 0); else deletefiles(files, 1, stdin); } #ifndef NO_SYMLINKS if (ISFLAG(flags, F_MAKESYMLINKS)) linkfiles(files, 0); #endif #ifndef NO_HARDLINKS if (ISFLAG(flags, F_HARDLINKFILES)) linkfiles(files, 1); #endif /* NO_HARDLINKS */ #ifdef ENABLE_DEDUPE if (ISFLAG(flags, F_DEDUPEFILES)) dedupefiles(files); #endif /* ENABLE_DEDUPE */ if (ISFLAG(flags, F_PRINTMATCHES)) printmatches(files); if (ISFLAG(flags, F_PRINTJSON)) printjson(files, argc, argv); if (ISFLAG(flags, F_SUMMARIZEMATCHES)) { if (ISFLAG(flags, F_PRINTMATCHES)) printf("\n\n"); summarizematches(files); } exit(EXIT_SUCCESS); } jdupes-1.18.1/jdupes.1000066400000000000000000000313001370142704600144510ustar00rootroot00000000000000.TH JDUPES 1 .\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection .\" other parms are allowed: see man(7), man(1) .SH NAME jdupes \- finds and performs actions upon duplicate files .SH SYNOPSIS .B jdupes [ .I options ] .I DIRECTORIES \|.\|.\|. .SH "DESCRIPTION" Searches the given path(s) for duplicate files. Such files are found by comparing file sizes, then partial and full file hashes, followed by a byte-by-byte comparison. The default behavior with no other "action options" specified (delete, summarize, link, dedupe, etc.) is to print sets of matching files. .SH OPTIONS .TP .B -@ --loud output annoying low-level debug info while running .TP .B -0 --printnull when printing matches, use null bytes instead of CR/LF bytes, just like 'find -print0' does. This has no effect with any action mode other than the default "print matches" (delete, link, etc. will still print normal line endings in the output.) .TP .B -1 --one-file-system do not match files that are on different filesystems or devices .TP .B -A --nohidden exclude hidden files from consideration .TP .B -B --dedupe issue the btrfs same-extents ioctl to trigger a deduplication on disk. The program must be built with btrfs support for this option to be available .TP .B -C --chunksize=\fIBYTES\fR set the I/O chunk size manually; larger values may improve performance on rotating media by reducing the number of head seeks required, but also increases memory usage and can reduce performance in some cases .TP .B -D --debug if this feature is compiled in, show debugging statistics and info at the end of program execution .TP .B -d --delete prompt user for files to preserve, deleting all others (see .B CAVEATS below) .TP .B -f --omitfirst omit the first file in each set of matches .TP .B -H --hardlinks normally, when two or more files point to the same disk area they are treated as non-duplicates; this option will change this behavior .TP .B -h --help displays help .TP .B -i --reverse reverse (invert) the sort order of matches .TP .B -I --isolate isolate each command-line parameter from one another; only match if the files are under different parameter specifications .TP .B -L --linkhard replace all duplicate files with hardlinks to the first file in each set of duplicates .TP .B -m --summarize summarize duplicate file information .TP .B -M --printwithsummary print matches and summarize the duplicate file information at the end .TP .B -N --noprompt when used together with \-\-delete, preserve the first file in each set of duplicates and delete the others without prompting the user .TP .B -n --noempty exclude zero-length files from consideration; this option is the default behavior and does nothing (also see \fB\-z/--zeromatch\fP) .TP .B -O --paramorder parameter order preservation is more important than the chosen sort; this is particularly useful with the \fB\-N\fP option to ensure that automatic deletion behaves in a controllable way .TP .B -o --order\fR=\fIWORD\fR order files according to WORD: time - sort by modification time name - sort by filename (default) .TP .B -p --permissions don't consider files with different owner/group or permission bits as duplicates .TP .B -P --print=type print extra information to stdout; valid options are: early - matches that pass early size/permission/link/etc. checks partial - files whose partial hashes match fullhash - files whose full hashes match .TP .B -Q --quick .B [WARNING: RISK OF DATA LOSS, SEE CAVEATS] skip byte-for-byte verification of duplicate pairs (use hashes only) .TP .B -q --quiet hide progress indicator .TP .B -R --recurse: for each directory given after this option follow subdirectories encountered within (note the ':' at the end of option; see the Examples section below for further explanation) .TP .B -r --recurse for every directory given follow subdirectories encountered within .TP .B -l --linksoft replace all duplicate files with symlinks to the first file in each set of duplicates .TP .B -S --size show size of duplicate files .TP .B -s --symlinks follow symlinked directories .TP .B -T --partial-only .B [WARNING: EXTREME RISK OF DATA LOSS, SEE CAVEATS] match based on hash of first block of file data, ignoring the rest .TP .B -u --printunique print only a list of unique (non-duplicate, unmatched) files .TP .B -v --version display jdupes version and compilation feature flags .TP .B -x --xsize=[+]SIZE (NOTE: deprecated in favor of \-X) exclude files of size less than SIZE from consideration, or if SIZE is prefixed with a '+' i.e. jdupes -x +226 [files] then exclude files larger than SIZE. Suffixes K/M/G can be used. .TP .B -X --extfilter=spec:info exclude/filter files based on specified criteria; general format: .B jdupes -X filter[:value][size_suffix] Some filters take no value or multiple values. Filters that can take a numeric option generally support the size multipliers K/M/G/T/P/E with or without an added iB or B. Multipliers are binary-style unless the B is used, which will use decimal multipliers. For example, 10k or 10kib = 10240; 10kb = 10000. Multipliers are case-insensitive. Filters have cumulative effects: jdupes -X size+:100 -X size-:100 will cause only files of exactly 100 bytes in size to be included. Supported filters are: .RS .IP `size[+-=]:number[suffix]' match only if size is greater (+), less than (-), or equal to (=) the specified number. The +/- and = specifiers can be combined, i.e. "size+=:4K" will only consider files with a size greater than or equal to four kilobytes (4096 bytes). .IP `noext:ext1[,ext2,...]' exclude files with certain extension(s), specified as a comma-separated list. Do not use a leading dot. .IP `onlyext:ext1[,ext2,...]' only include files with certain extension(s), specified as a comma-separated list. Do not use a leading dot. .IP `nostr:text_string' exclude all paths containing the substring text_string. This scans the full file path, so it can be used to match directories: -X nostr:dir_name/ .IP `onlystr:text_string' require all paths to contain the substring text_string. This scans the full file path, so it can be used to match directories: -X onlystr:dir_name/ .RE .TP .B -z --zeromatch consider zero-length files to be duplicates; this replaces the old default behavior when \fB\-n\fP was not specified .TP .B -Z --softabort if the user aborts the program (as with CTRL-C) act on the matches that were found before the abort was received. For example, if -L and -Z are specified, all matches found prior to the abort will be hard linked. The default behavior without -Z is to abort without taking any actions. .SH NOTES A set of arrows are used in hard linking to show what action was taken on each link candidate. These arrows are as follows: .TP .B ----> This file was successfully hard linked to the first file in the duplicate chain .TP .B -@@-> This file was successfully symlinked to the first file in the chain .TP .B -==-> This file was already a hard link to the first file in the chain .TP .B -//-> Linking this file failed due to an error during the linking process .PP Duplicate files are listed together in groups with each file displayed on a separate line. The groups are then separated from each other by blank lines. .SH EXAMPLES .TP .B jdupes a --recurse: b will follow subdirectories under b, but not those under a. .TP .B jdupes a --recurse b will follow subdirectories under both a and b. .TP .B jdupes -O dir1 dir3 dir2 will always place 'dir1' results first in any match set (where relevant) .SH CAVEATS Using .B \-1 or .BR \-\-one\-file\-system prevents matches that cross filesystems, but a more relaxed form of this option may be added that allows cross-matching for all filesystems that each parameter is present on. When using .B \-d or .BR \-\-delete , care should be taken to insure against accidental data loss. .B \-Z or .BR \-\-softabort used to be --hardabort in jdupes prior to v1.5 and had the opposite behavior. Defaulting to taking action on abort is probably not what most users would expect. The decision to invert rather than reassign to a different option was made because this feature was still fairly new at the time of the change. The .B \-O or .BR \-\-paramorder option allows the user greater control over what appears in the first position of a match set, specifically for keeping the \fB\-N\fP option from deleting all but one file in a set in a seemingly random way. All directories specified on the command line will be used as the sorting order of result sets first, followed by the sorting algorithm set by the \fB\-o\fP or \fB\-\-order\fP option. This means that the order of all match pairs for a single directory specification will retain the old sorting behavior even if this option is specified. When used together with options .B \-s or .BR \-\-symlink , a user could accidentally preserve a symlink while deleting the file it points to. The .B \-Q or .BR \-\-quick option only reads each file once, hashes it, and performs comparisons based solely on the hashes. There is a small but significant risk of a hash collision which is the purpose of the failsafe byte-for-byte comparison that this option explicitly bypasses. Do not use it on ANY data set for which any amount of data loss is unacceptable. This option is not included in the help text for the program due to its risky nature. .B You have been warned! The .B \-T or .BR \-\-partial\-only option produces results based on a hash of the first block of file data in each file, ignoring everything else in the file. Partial hash checks have always been an important exclusion step in the jdupes algorithm, usually hashing the first 4096 bytes of data and allowing files that are different at the start to be rejected early. In certain scenarios it may be a useful heuristic for a user to see that a set of files has the same size and the same starting data, even if the remaining data does not match; one example of this would be comparing files with data blocks that are damaged or missing such as an incomplete file transfer or checking a data recovery against known-good copies to see what damaged data can be deleted in favor of restoring the known-good copy. This option is meant to be used with informational actions and .B can result in EXTREME DATA LOSS if used with options that delete files, create hard links, or perform other destructive actions on data based on the matching output. Because of the potential for massive data destruction, .B this option MUST BE SPECIFIED TWICE to take effect and will error out if it is only specified once. Using the .B \-C or .BR \-\-chunksize option to override I/O chunk size can increase performance on rotating storage media by reducing "head thrashing," reading larger amounts of data sequentially from each file. This tunable size can have bad side effects; the default size maximizes algorithmic performance without regard to the I/O characteristics of any given device and uses a modest amount of memory, but other values may greatly increase memory usage or incur a lot more system call overhead. Try several different values to see how they affect performance for your hardware and data set. This option does not affect match results in any way, so even if it slows down the file matching process it will not hurt anything. .SH REPORTING BUGS Send bug reports to jody@jodybruchon.com or use the issue tracker at: http://github.com/jbruchon/jdupes/issues .SH SUPPORTING DEVELOPMENT If you find this program useful, please consider financially supporting its continued development by visiting the following URL: https://www.subscribestar.com/JodyBruchon .SH AUTHOR jdupes is created and maintained by Jody Bruchon and was forked from fdupes 1.51 by Adrian Lopez .SH LICENSE The MIT License (MIT) Copyright (C) 2015-2020 Jody Lee Bruchon and contributors Forked from fdupes 1.51, Copyright (C) 1999-2014 Adrian Lopez and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jdupes-1.18.1/jdupes.c000066400000000000000000002307711370142704600145500ustar00rootroot00000000000000/* jdupes (C) 2015-2020 Jody Bruchon Forked from fdupes 1.51 (C) 1999-2014 Adrian Lopez Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef OMIT_GETOPT_LONG #include #endif #include #include #include #include #include #include "jdupes.h" #include "xxhash.h" #ifdef ENABLE_DEDUPE #include #endif /* Jody Bruchon's helpful functions */ #include "string_malloc.h" #include "jody_sort.h" #include "jody_win_unicode.h" #include "jody_cacheinfo.h" #include "jody_strtoepoch.h" #include "version.h" /* Headers for post-scanning actions */ #include "act_deletefiles.h" #include "act_dedupefiles.h" #include "act_linkfiles.h" #include "act_printmatches.h" #include "act_printjson.h" #include "act_summarize.h" /* Detect Windows and modify as needed */ #if defined _WIN32 || defined __CYGWIN__ const char dir_sep = '\\'; #ifdef UNICODE const wchar_t *FILE_MODE_RO = L"rbS"; #else const char *FILE_MODE_RO = "rbS"; #endif /* UNICODE */ #else /* Not Windows */ const char *FILE_MODE_RO = "rb"; const char dir_sep = '/'; #ifdef UNICODE #error Do not define UNICODE on non-Windows platforms. #undef UNICODE #endif #endif /* _WIN32 || __CYGWIN__ */ /* Windows + Unicode compilation */ #ifdef UNICODE static wpath_t wname, wstr; int out_mode = _O_TEXT; int err_mode = _O_TEXT; #endif /* UNICODE */ #ifndef NO_SYMLINKS #include "jody_paths.h" #endif /* Behavior modification flags */ uint_fast32_t flags = 0, p_flags = 0; static const char *program_name; /* This gets used in many functions */ #ifdef ON_WINDOWS struct winstat s; #else struct stat s; #endif /* Larger chunk size makes large files process faster but uses more RAM */ #define MIN_CHUNK_SIZE 4096 #define MAX_CHUNK_SIZE 16777216 #ifndef CHUNK_SIZE #define CHUNK_SIZE 65536 #endif #ifndef PARTIAL_HASH_SIZE #define PARTIAL_HASH_SIZE 4096 #endif static size_t auto_chunk_size = CHUNK_SIZE; /* Maximum path buffer size to use; must be large enough for a path plus * any work that might be done to the array it's stored in. PATH_MAX is * not always true. Read this article on the false promises of PATH_MAX: * http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html * Windows + Unicode needs a lot more space than UTF-8 in Linux/Mac OS X */ #ifndef PATHBUF_SIZE #define PATHBUF_SIZE 4096 #endif /* Refuse to build if PATHBUF_SIZE is too small */ #if PATHBUF_SIZE < PATH_MAX #error "PATHBUF_SIZE can't be less than PATH_MAX" #endif /* Size suffixes - this gets exported */ const struct size_suffix size_suffix[] = { /* Byte (someone may actually try to use this) */ { "b", 1 }, { "k", 1024 }, { "kib", 1024 }, { "m", 1048576 }, { "mib", 1048576 }, { "g", (uint64_t)1048576 * 1024 }, { "gib", (uint64_t)1048576 * 1024 }, { "t", (uint64_t)1048576 * 1048576 }, { "tib", (uint64_t)1048576 * 1048576 }, { "p", (uint64_t)1048576 * 1048576 * 1024}, { "pib", (uint64_t)1048576 * 1048576 * 1024}, { "e", (uint64_t)1048576 * 1048576 * 1048576}, { "eib", (uint64_t)1048576 * 1048576 * 1048576}, /* Decimal suffixes */ { "kb", 1000 }, { "mb", 1000000 }, { "gb", 1000000000 }, { "tb", 1000000000000 }, { "pb", 1000000000000000 }, { "eb", 1000000000000000000 }, { NULL, 0 }, }; /* Assemble extension string from compile-time options */ const char *extensions[] = { #ifdef ON_WINDOWS "windows", #endif #ifdef UNICODE "unicode", #endif #ifdef OMIT_GETOPT_LONG "nolong", #endif #ifdef __FAST_MATH__ "fastmath", #endif #ifdef DEBUG "debug", #endif #ifdef LOUD_DEBUG "loud", #endif #ifdef ENABLE_DEDUPE "fsdedup", #endif #ifdef LOW_MEMORY "lowmem", #endif #ifdef SMA_PAGE_SIZE "smapage", #endif #ifdef NO_PERMS "noperm", #endif #ifdef NO_HARDLINKS "nohardlink", #endif #ifdef NO_SYMLINKS "nosymlink", #endif #ifdef NO_USER_ORDER "nouserorder", #endif NULL }; /* Tree to track each directory traversed */ struct travdone { struct travdone *left; struct travdone *right; jdupes_ino_t inode; dev_t device; }; static struct travdone *travdone_head = NULL; /* Extended filter tree head and static tag list */ struct extfilter *extfilter_head = NULL; const struct extfilter_tags extfilter_tags[] = { { "noext", XF_EXCL_EXT }, { "onlyext", XF_ONLY_EXT }, { "size+", XF_SIZE_GT }, { "size-", XF_SIZE_LT }, { "size+=", XF_SIZE_GTEQ }, { "size-=", XF_SIZE_LTEQ }, { "size=", XF_SIZE_EQ }, { "nostr", XF_EXCL_STR }, { "onlystr", XF_ONLY_STR }, { "newer", XF_DATE_NEWER }, { "older", XF_DATE_OLDER }, { NULL, 0 }, }; /* Required for progress indicator code */ static uintmax_t filecount = 0; static uintmax_t progress = 0, item_progress = 0, dupecount = 0; /* Number of read loops before checking progress indicator */ #define CHECK_MINIMUM 256 /* Hash/compare performance statistics (debug mode) */ #ifdef DEBUG static unsigned int small_file = 0, partial_hash = 0, partial_elim = 0; static unsigned int full_hash = 0, partial_to_full = 0, hash_fail = 0; static uintmax_t comparisons = 0; static unsigned int left_branch = 0, right_branch = 0; #ifdef ON_WINDOWS #ifndef NO_HARDLINKS static unsigned int hll_exclude = 0; #endif #endif #endif /* DEBUG */ #ifdef TREE_DEPTH_STATS static unsigned int tree_depth = 0; static unsigned int max_depth = 0; #endif /* File tree head */ static filetree_t *checktree = NULL; /* Directory/file parameter position counter */ static unsigned int user_item_count = 1; /* registerfile() direction options */ enum tree_direction { NONE, LEFT, RIGHT }; /* Sort order reversal */ static int sort_direction = 1; /* Signal handler */ static int interrupt = 0; /* Progress indicator time */ struct timeval time1, time2; /* For path name mangling */ char tempname[PATHBUF_SIZE * 2]; /* Compare two hashes like memcmp() */ #define HASH_COMPARE(a,b) ((a > b) ? 1:((a == b) ? 0:-1)) static void help_text_extfilter(void); /***** End definitions, begin code *****/ /* Catch CTRL-C and either notify or terminate */ void sighandler(const int signum) { (void)signum; if (interrupt || !ISFLAG(flags, F_SOFTABORT)) { fprintf(stderr, "\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } interrupt = 1; return; } #ifndef ON_WINDOWS void sigusr1(const int signum) { (void)signum; if (!ISFLAG(flags, F_SOFTABORT)) SETFLAG(flags, F_SOFTABORT); else CLEARFLAG(flags, F_SOFTABORT); return; } #endif /* Out of memory */ extern void oom(const char * const restrict msg) { fprintf(stderr, "\nout of memory: %s\n", msg); string_malloc_destroy(); exit(EXIT_FAILURE); } /* Null pointer failure */ extern void nullptr(const char * restrict func) { static const char n[] = "(NULL)"; if (func == NULL) func = n; fprintf(stderr, "\ninternal error: NULL pointer passed to %s\n", func); string_malloc_destroy(); exit(EXIT_FAILURE); } static inline char **cloneargs(const int argc, char **argv) { static int x; static char **args; args = (char **)string_malloc(sizeof(char *) * (unsigned int)argc); if (args == NULL) oom("cloneargs() start"); for (x = 0; x < argc; x++) { args[x] = (char *)string_malloc(strlen(argv[x]) + 1); if (args[x] == NULL) oom("cloneargs() loop"); strcpy(args[x], argv[x]); } return args; } static int findarg(const char * const arg, const int start, const int argc, char **argv) { int x; for (x = start; x < argc; x++) if (strcmp(argv[x], arg) == 0) return x; return x; } /* Find the first non-option argument after specified option. */ static int nonoptafter(const char *option, const int argc, char **oldargv, char **newargv) { int x; int targetind; int testind; int startat = 1; targetind = findarg(option, 1, argc, oldargv); for (x = optind; x < argc; x++) { testind = findarg(newargv[x], startat, argc, oldargv); if (testind > targetind) return x; else startat = testind; } return x; } /* Update progress indicator if requested */ static void update_progress(const char * const restrict msg, const int file_percent) { static int did_fpct = 0; /* The caller should be doing this anyway...but don't trust that they did */ if (ISFLAG(flags, F_HIDEPROGRESS)) return; gettimeofday(&time2, NULL); if (progress == 0 || time2.tv_sec > time1.tv_sec) { fprintf(stderr, "\rProgress [%" PRIuMAX "/%" PRIuMAX ", %" PRIuMAX " pairs matched] %" PRIuMAX "%%", progress, filecount, dupecount, (progress * 100) / filecount); if (file_percent > -1 && msg != NULL) { fprintf(stderr, " (%s: %d%%) ", msg, file_percent); did_fpct = 1; } else if (did_fpct != 0) { fprintf(stderr, " "); did_fpct = 0; } fflush(stderr); } time1.tv_sec = time2.tv_sec; return; } /***** Add new functions here *****/ /* Does a file have one of these comma-separated extensions? * Returns 1 after any match, 0 if no matches */ int match_extensions(char *path, const char *extlist) { char *dot; const char *ext; size_t len; LOUD(fprintf(stderr, "match_extensions('%s', '%s')\n", path, extlist);) if (path == NULL || extlist == NULL) nullptr("match_extensions"); dot = NULL; /* Scan to end of path, save the last dot, reset on path separators */ while (*path != '\0') { if (*path == '.') dot = path; if (*path == '/' || *path == '\\') dot = NULL; path++; } /* No dots in the file name = no extension, so give up now */ if (dot == NULL) return 0; dot++; /* Handle a dot at the end of a file name */ if (*dot == '\0') return 0; /* dot is now at the location of the last file extension; check the list */ while (*extlist == ',') extlist++; ext = extlist; len = 0; while (1) { if (*extlist == '\0' && len == 0) return 0; if (*extlist == ',' || *extlist == '\0') { while (*extlist == ',') extlist++; if (extlist == ext) goto skip_empty; if (strncmp(dot, ext, len) == 0) { LOUD(fprintf(stderr, "match_extensions: matched on extension '%s'\n", dot);) return 1; } skip_empty: ext = extlist; if (*extlist != '\0') extlist++; len = 0; continue; } extlist++; len++; } return 0; } /* Check file's stat() info to make sure nothing has changed * Returns 1 if changed, 0 if not changed, negative if error */ extern int file_has_changed(file_t * const restrict file) { /* If -t/--nochangecheck specified then completely bypass this code */ if (ISFLAG(flags, F_NOCHANGECHECK)) return 0; if (file == NULL || file->d_name == NULL) nullptr("file_has_changed()"); LOUD(fprintf(stderr, "file_has_changed('%s')\n", file->d_name);) if (!ISFLAG(file->flags, FF_VALID_STAT)) return -66; if (STAT(file->d_name, &s) != 0) return -2; if (file->inode != s.st_ino) return 1; if (file->size != s.st_size) return 1; if (file->device != s.st_dev) return 1; if (file->mtime != s.st_mtime) return 1; if (file->mode != s.st_mode) return 1; #ifndef NO_PERMS if (file->uid != s.st_uid) return 1; if (file->gid != s.st_gid) return 1; #endif #ifndef NO_SYMLINKS if (lstat(file->d_name, &s) != 0) return -3; if ((S_ISLNK(s.st_mode) > 0) ^ ISFLAG(file->flags, FF_IS_SYMLINK)) return 1; #endif return 0; } extern inline int getfilestats(file_t * const restrict file) { if (file == NULL || file->d_name == NULL) nullptr("getfilestats()"); LOUD(fprintf(stderr, "getfilestats('%s')\n", file->d_name);) /* Don't stat the same file more than once */ if (ISFLAG(file->flags, FF_VALID_STAT)) return 0; SETFLAG(file->flags, FF_VALID_STAT); if (STAT(file->d_name, &s) != 0) return -1; file->inode = s.st_ino; file->size = s.st_size; file->device = s.st_dev; file->mtime = s.st_mtime; file->mode = s.st_mode; #ifndef NO_HARDLINKS file->nlink = s.st_nlink; #endif #ifndef NO_PERMS file->uid = s.st_uid; file->gid = s.st_gid; #endif #ifndef NO_SYMLINKS if (lstat(file->d_name, &s) != 0) return -1; if (S_ISLNK(s.st_mode) > 0) SETFLAG(file->flags, FF_IS_SYMLINK); #endif return 0; } static void add_extfilter(const char *option) { char *opt, *p; time_t tt; struct extfilter *extf = extfilter_head; const struct extfilter_tags *tags = extfilter_tags; const struct size_suffix *ss = size_suffix; if (option == NULL) nullptr("add_extfilter()"); LOUD(fprintf(stderr, "add_extfilter '%s'\n", option);) /* Invoke help text if requested */ if (strcasecmp(option, "help") == 0) { help_text_extfilter(); exit(EXIT_SUCCESS); } opt = string_malloc(strlen(option) + 1); if (opt == NULL) oom("add_extfilter option"); strcpy(opt, option); p = opt; while (*p != ':' && *p != '\0') p++; /* Split tag string into *opt (tag) and *p (value) */ if (*p == ':') { *p = '\0'; p++; } while (tags->tag != NULL && strcmp(tags->tag, opt) != 0) tags++; if (tags->tag == NULL) goto error_bad_filter; /* Check for a tag that requires a value */ if (tags->flags & XF_REQ_VALUE && *p == '\0') goto error_value_missing; /* *p is now at the value, NOT the tag string! */ if (extfilter_head != NULL) { /* Add to end of exclusion stack if head is present */ while (extf->next != NULL) extf = extf->next; extf->next = string_malloc(sizeof(struct extfilter) + strlen(p) + 1); if (extf->next == NULL) oom("add_extfilter alloc"); extf = extf->next; } else { /* Allocate extfilter_head if no exclusions exist yet */ extfilter_head = string_malloc(sizeof(struct extfilter) + strlen(p) + 1); if (extfilter_head == NULL) oom("add_extfilter alloc"); extf = extfilter_head; } /* Set tag value from predefined tag array */ extf->flags = tags->flags; /* Initialize the new extfilter element */ extf->next = NULL; if (extf->flags & XF_REQ_NUMBER) { /* Exclude uses a number; handle it with possible suffixes */ *(extf->param) = '\0'; /* Get base size */ if (*p < '0' || *p > '9') goto error_bad_size_suffix; extf->size = strtoll(p, &p, 10); /* Handle suffix, if any */ if (*p != '\0') { while (ss->suffix != NULL && strcasecmp(ss->suffix, p) != 0) ss++; if (ss->suffix == NULL) goto error_bad_size_suffix; extf->size *= ss->multiplier; } } else if (extf->flags & XF_REQ_DATE) { *(extf->param) = '\0'; tt = strtoepoch(p); LOUD(fprintf(stderr, "extfilter: jody_strtoepoch: '%s' -> %ld\n", p, tt);) if (tt == -1) goto error_bad_time; extf->size = tt; } else { /* Exclude uses string data; just copy it */ extf->size = 0; if (*p != '\0') strcpy(extf->param, p); else *(extf->param) = '\0'; } LOUD(fprintf(stderr, "Added extfilter: tag '%s', data '%s', size %lld, flags %d\n", opt, extf->param, (long long)extf->size, extf->flags);) string_free(opt); return; error_bad_time: fprintf(stderr, "Invalid extfilter date[time] was specified: -X filter:datetime\n"); help_text_extfilter(); exit(EXIT_FAILURE); error_value_missing: fprintf(stderr, "extfilter value missing or invalid: -X filter:value\n"); help_text_extfilter(); exit(EXIT_FAILURE); error_bad_filter: fprintf(stderr, "Invalid extfilter filter name was specified\n"); help_text_extfilter(); exit(EXIT_FAILURE); error_bad_size_suffix: fprintf(stderr, "Invalid extfilter size suffix specified; use B or KMGTPE[i][B]\n"); help_text_extfilter(); exit(EXIT_FAILURE); } /* Returns -1 if stat() fails, 0 if it's a directory, 1 if it's not */ extern int getdirstats(const char * const restrict name, jdupes_ino_t * const restrict inode, dev_t * const restrict dev, jdupes_mode_t * const restrict mode) { if (name == NULL || inode == NULL || dev == NULL) nullptr("getdirstats"); LOUD(fprintf(stderr, "getdirstats('%s', %p, %p)\n", name, (void *)inode, (void *)dev);) if (STAT(name, &s) != 0) return -1; *inode = s.st_ino; *dev = s.st_dev; *mode = s.st_mode; if (!S_ISDIR(s.st_mode)) return 1; return 0; } /* Check a pair of files for match exclusion conditions * Returns: * 0 if all condition checks pass * -1 or 1 on compare result less/more * -2 on an absolute exclusion condition met * 2 on an absolute match condition met * -3 on exclusion due to isolation * -4 on exlusion due to same filesystem * -5 on exclusion due to permissions */ extern int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2) { if (file1 == NULL || file2 == NULL || file1->d_name == NULL || file2->d_name == NULL) nullptr("check_conditions()"); LOUD(fprintf(stderr, "check_conditions('%s', '%s')\n", file1->d_name, file2->d_name);) /* Exclude files that are not the same size */ if (file1->size > file2->size) { LOUD(fprintf(stderr, "check_conditions: no match: size of file1 > file2 (%" PRIdMAX " > %" PRIdMAX ")\n", (intmax_t)file1->size, (intmax_t)file2->size)); return -1; } if (file1->size < file2->size) { LOUD(fprintf(stderr, "check_conditions: no match: size of file1 < file2 (%" PRIdMAX " < %"PRIdMAX ")\n", (intmax_t)file1->size, (intmax_t)file2->size)); return 1; } #ifndef NO_USER_ORDER /* Exclude based on -I/--isolate */ if (ISFLAG(flags, F_ISOLATE) && (file1->user_order == file2->user_order)) { LOUD(fprintf(stderr, "check_conditions: files ignored: parameter isolation\n")); return -3; } #endif /* NO_USER_ORDER */ /* Exclude based on -1/--one-file-system */ if (ISFLAG(flags, F_ONEFS) && (file1->device != file2->device)) { LOUD(fprintf(stderr, "check_conditions: files ignored: not on same filesystem\n")); return -4; } /* Exclude files by permissions if requested */ if (ISFLAG(flags, F_PERMISSIONS) && (file1->mode != file2->mode #ifndef NO_PERMS || file1->uid != file2->uid || file1->gid != file2->gid #endif )) { return -5; LOUD(fprintf(stderr, "check_conditions: no match: permissions/ownership differ (-p on)\n")); } /* Hard link and symlink + '-s' check */ #ifndef NO_HARDLINKS if ((file1->inode == file2->inode) && (file1->device == file2->device)) { if (ISFLAG(flags, F_CONSIDERHARDLINKS)) { LOUD(fprintf(stderr, "check_conditions: files match: hard/soft linked (-H on)\n")); return 2; } else { LOUD(fprintf(stderr, "check_conditions: files ignored: hard/soft linked (-H off)\n")); return -2; } } #endif /* Fall through: all checks passed */ LOUD(fprintf(stderr, "check_conditions: all condition checks passed\n")); return 0; } /* Check for exclusion conditions for a single file (1 = fail) */ static int check_singlefile(file_t * const restrict newfile) { char * restrict tp = tempname; int excluded; if (newfile == NULL) nullptr("check_singlefile()"); LOUD(fprintf(stderr, "check_singlefile: checking '%s'\n", newfile->d_name)); /* Exclude hidden files if requested */ if (ISFLAG(flags, F_EXCLUDEHIDDEN)) { if (newfile->d_name == NULL) nullptr("check_singlefile newfile->d_name"); strcpy(tp, newfile->d_name); tp = basename(tp); if (tp[0] == '.' && strcmp(tp, ".") && strcmp(tp, "..")) { LOUD(fprintf(stderr, "check_singlefile: excluding hidden file (-A on)\n")); return 1; } } /* Get file information and check for validity */ const int i = getfilestats(newfile); if (i || newfile->size == -1) { LOUD(fprintf(stderr, "check_singlefile: excluding due to bad stat()\n")); return 1; } if (!S_ISDIR(newfile->mode)) { /* Exclude zero-length files if requested */ if (newfile->size == 0 && !ISFLAG(flags, F_INCLUDEEMPTY)) { LOUD(fprintf(stderr, "check_singlefile: excluding zero-length empty file (-z not set)\n")); return 1; } /* Exclude files based on exclusion stack size specs */ excluded = 0; for (struct extfilter *extf = extfilter_head; extf != NULL; extf = extf->next) { uint32_t sflag = extf->flags; LOUD(fprintf(stderr, "check_singlefile: extfilter check: %08x %ld %ld %s\n", sflag, newfile->size, extf->size, newfile->d_name);) if ( ((sflag == XF_SIZE_EQ) && (newfile->size != extf->size)) || ((sflag == XF_SIZE_LTEQ) && (newfile->size <= extf->size)) || ((sflag == XF_SIZE_GTEQ) && (newfile->size >= extf->size)) || ((sflag == XF_SIZE_GT) && (newfile->size > extf->size)) || ((sflag == XF_SIZE_LT) && (newfile->size < extf->size)) || ((sflag == XF_EXCL_EXT) && match_extensions(newfile->d_name, extf->param)) || ((sflag == XF_ONLY_EXT) && !match_extensions(newfile->d_name, extf->param)) || ((sflag == XF_EXCL_STR) && strstr(newfile->d_name, extf->param)) || ((sflag == XF_ONLY_STR) && !strstr(newfile->d_name, extf->param)) || ((sflag == XF_DATE_NEWER) && (newfile->mtime >= extf->size)) || ((sflag == XF_DATE_OLDER) && (newfile->mtime < extf->size)) ) excluded = 1; } if (excluded) { LOUD(fprintf(stderr, "check_singlefile: excluding based on an extfilter option\n")); return 1; } } #ifdef ON_WINDOWS /* Windows has a 1023 (+1) hard link limit. If we're hard linking, * ignore all files that have hit this limit */ #ifndef NO_HARDLINKS if (ISFLAG(flags, F_HARDLINKFILES) && newfile->nlink >= 1024) { #ifdef DEBUG hll_exclude++; #endif LOUD(fprintf(stderr, "check_singlefile: excluding due to Windows 1024 hard link limit\n")); return 1; } #endif /* NO_HARDLINKS */ #endif /* ON_WINDOWS */ LOUD(fprintf(stderr, "check_singlefile: all checks passed\n")); return 0; } static file_t *init_newfile(const size_t len, file_t * restrict * const restrict filelistp) { file_t * const restrict newfile = (file_t *)string_malloc(sizeof(file_t)); if (!newfile) oom("init_newfile() file structure"); if (!filelistp) nullptr("init_newfile() filelistp"); LOUD(fprintf(stderr, "init_newfile(len %lu, filelistp %p)\n", len, filelistp)); memset(newfile, 0, sizeof(file_t)); newfile->d_name = (char *)string_malloc(len); if (!newfile->d_name) oom("init_newfile() filename"); newfile->next = *filelistp; #ifndef NO_USER_ORDER newfile->user_order = user_item_count; #endif newfile->size = -1; newfile->duplicates = NULL; return newfile; } /* Create a new traversal check object and initialize its values */ static struct travdone *travdone_alloc(const dev_t device, const jdupes_ino_t inode) { struct travdone *trav; LOUD(fprintf(stderr, "travdone_alloc(%" PRIdMAX ", %" PRIdMAX ")\n", (intmax_t)inode, (intmax_t)device);) trav = (struct travdone *)string_malloc(sizeof(struct travdone)); if (trav == NULL) { LOUD(fprintf(stderr, "travdone_alloc: malloc failed\n");) return NULL; } trav->left = NULL; trav->right = NULL; trav->inode = inode; trav->device = device; LOUD(fprintf(stderr, "travdone_alloc returned %p\n", (void *)trav);) return trav; } /* De-allocate the travdone tree */ static void travdone_free(struct travdone * const restrict cur) { if (cur == NULL) return; if (cur->left != NULL) travdone_free(cur->left); if (cur->right != NULL) travdone_free(cur->right); string_free(cur); return; } /* Check to see if device:inode pair has already been traversed */ static int traverse_check(const dev_t device, const jdupes_ino_t inode) { struct travdone *traverse = travdone_head; if (travdone_head == NULL) { travdone_head = travdone_alloc(device, inode); if (travdone_head == NULL) return 2; } else { traverse = travdone_head; while (1) { if (traverse == NULL) nullptr("traverse_check()"); /* Don't re-traverse directories we've already seen */ if (inode == traverse->inode && device == traverse->device) { LOUD(fprintf(stderr, "traverse_check: already seen: %ld:%ld\n", device,inode);) return 1; } else if (inode > traverse->inode || (inode == traverse->inode && device > traverse->device)) { /* Traverse right */ if (traverse->right == NULL) { LOUD(fprintf(stderr, "traverse item right: %ld:%ld\n", device, inode);) traverse->right = travdone_alloc(device, inode); if (traverse->right == NULL) return 2; break; } traverse = traverse->right; continue; } else { /* Traverse left */ if (traverse->left == NULL) { LOUD(fprintf(stderr, "traverse item left %ld,%ld\n", device, inode);) traverse->left = travdone_alloc(device, inode); if (traverse->left == NULL) return 2; break; } traverse = traverse->left; continue; } } } return 0; } /* This is disabled until a check is in place to make it safe */ #if 0 /* Add a single file to the file tree */ static inline file_t *grokfile(const char * const restrict name, file_t * restrict * const restrict filelistp) { file_t * restrict newfile; if (!name || !filelistp) nullptr("grokfile()"); LOUD(fprintf(stderr, "grokfile: '%s' %p\n", name, filelistp)); /* Allocate the file_t and the d_name entries */ newfile = init_newfile(strlen(name) + 2, filelistp); strcpy(newfile->d_name, name); /* Single-file [l]stat() and exclusion condition check */ if (check_singlefile(newfile) != 0) { LOUD(fprintf(stderr, "grokfile: check_singlefile rejected file\n")); string_free(newfile->d_name); string_free(newfile); return NULL; } return newfile; } #endif /* Load a directory's contents into the file tree, recursing as needed */ static void grokdir(const char * const restrict dir, file_t * restrict * const restrict filelistp, int recurse) { file_t * restrict newfile; struct dirent *dirinfo; static int grokdir_level = 0; size_t dirlen; int i, single = 0; jdupes_ino_t inode; dev_t device, n_device; jdupes_mode_t mode; #ifdef UNICODE WIN32_FIND_DATA ffd; HANDLE hFind = INVALID_HANDLE_VALUE; char *p; #else DIR *cd; #endif if (dir == NULL || filelistp == NULL) nullptr("grokdir()"); LOUD(fprintf(stderr, "grokdir: scanning '%s' (order %d, recurse %d)\n", dir, user_item_count, recurse)); /* Get directory stats (or file stats if it's a file) */ i = getdirstats(dir, &inode, &device, &mode); if (i < 0) goto error_travdone; /* if dir is actually a file, just add it to the file tree */ if (i == 1) { /* Single file addition is disabled for now because there is no safeguard * against the file being compared against itself if it's added in both a * recursion and explicitly on the command line. */ #if 0 LOUD(fprintf(stderr, "grokdir -> grokfile '%s'\n", dir)); newfile = grokfile(dir, filelistp); if (newfile == NULL) { LOUD(fprintf(stderr, "grokfile rejected '%s'\n", dir)); return; } single = 1; goto add_single_file; #endif fprintf(stderr, "\nFile specs on command line disabled in this version for safety\n"); fprintf(stderr, "This should be restored (and safe) in a future release\n"); fprintf(stderr, "See https://github.com/jbruchon/jdupes or email jody@jodybruchon.com\n"); return; /* Remove when single file is restored */ } /* Double traversal prevention tree */ i = traverse_check(device, inode); if (i == 1) return; if (i == 2) goto error_travdone; item_progress++; grokdir_level++; #ifdef UNICODE /* Windows requires \* at the end of directory names */ strncpy(tempname, dir, PATHBUF_SIZE * 2 - 1); dirlen = strlen(tempname) - 1; p = tempname + dirlen; if (*p == '/' || *p == '\\') *p = '\0'; strncat(tempname, "\\*", PATHBUF_SIZE * 2 - 1); if (!M2W(tempname, wname)) goto error_cd; LOUD(fprintf(stderr, "FindFirstFile: %s\n", dir)); hFind = FindFirstFileW(wname, &ffd); if (hFind == INVALID_HANDLE_VALUE) { LOUD(fprintf(stderr, "\nfile handle bad\n")); goto error_cd; } LOUD(fprintf(stderr, "Loop start\n")); do { char * restrict tp = tempname; size_t d_name_len; /* Get necessary length and allocate d_name */ dirinfo = (struct dirent *)string_malloc(sizeof(struct dirent)); if (!W2M(ffd.cFileName, dirinfo->d_name)) continue; #else cd = opendir(dir); if (!cd) goto error_cd; while ((dirinfo = readdir(cd)) != NULL) { char * restrict tp = tempname; size_t d_name_len; #endif /* UNICODE */ LOUD(fprintf(stderr, "grokdir: readdir: '%s'\n", dirinfo->d_name)); if (!strcmp(dirinfo->d_name, ".") || !strcmp(dirinfo->d_name, "..")) continue; if (!ISFLAG(flags, F_HIDEPROGRESS)) { gettimeofday(&time2, NULL); if (progress == 0 || time2.tv_sec > time1.tv_sec) { fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " dirs (in %u specified)", progress, item_progress, user_item_count); } time1.tv_sec = time2.tv_sec; } /* Assemble the file's full path name, optimized to avoid strcat() */ dirlen = strlen(dir); d_name_len = strlen(dirinfo->d_name); memcpy(tp, dir, dirlen+1); if (dirlen != 0 && tp[dirlen-1] != dir_sep) { tp[dirlen] = dir_sep; dirlen++; } if (dirlen + d_name_len + 1 >= (PATHBUF_SIZE * 2)) goto error_overflow; tp += dirlen; memcpy(tp, dirinfo->d_name, d_name_len); tp += d_name_len; *tp = '\0'; d_name_len++; /* Allocate the file_t and the d_name entries */ newfile = init_newfile(dirlen + d_name_len + 2, filelistp); tp = tempname; memcpy(newfile->d_name, tp, dirlen + d_name_len); /*** WARNING: tempname global gets reused by check_singlefile here! ***/ /* Single-file [l]stat() and exclusion condition check */ if (check_singlefile(newfile) != 0) { LOUD(fprintf(stderr, "grokdir: check_singlefile rejected file\n")); string_free(newfile->d_name); string_free(newfile); continue; } /* Optionally recurse directories, including symlinked ones if requested */ if (S_ISDIR(newfile->mode)) { if (recurse) { /* --one-file-system - WARNING: this clobbers inode/mode */ if (ISFLAG(flags, F_ONEFS) && (getdirstats(newfile->d_name, &inode, &n_device, &mode) == 0) && (device != n_device)) { LOUD(fprintf(stderr, "grokdir: directory: not recursing (--one-file-system)\n")); string_free(newfile->d_name); string_free(newfile); continue; } #ifndef NO_SYMLINKS else if (ISFLAG(flags, F_FOLLOWLINKS) || !ISFLAG(newfile->flags, FF_IS_SYMLINK)) { LOUD(fprintf(stderr, "grokdir: directory(symlink): recursing (-r/-R)\n")); grokdir(newfile->d_name, filelistp, recurse); } #else else { LOUD(fprintf(stderr, "grokdir: directory: recursing (-r/-R)\n")); grokdir(newfile->d_name, filelistp, recurse); } #endif } else { LOUD(fprintf(stderr, "grokdir: directory: not recursing\n")); } string_free(newfile->d_name); string_free(newfile); continue; } else { //add_single_file: /* Add regular files to list, including symlink targets if requested */ #ifndef NO_SYMLINKS if (!ISFLAG(newfile->flags, FF_IS_SYMLINK) || (ISFLAG(newfile->flags, FF_IS_SYMLINK) && ISFLAG(flags, F_FOLLOWLINKS))) { #else if (S_ISREG(newfile->mode)) { #endif *filelistp = newfile; filecount++; progress++; } else { LOUD(fprintf(stderr, "grokdir: not a regular file: %s\n", newfile->d_name);) string_free(newfile->d_name); string_free(newfile); if (single == 1) { single = 0; goto skip_single; } continue; } } /* Skip directory stuff if adding only a single file */ if (single == 1) { single = 0; goto skip_single; } } #ifdef UNICODE while (FindNextFileW(hFind, &ffd) != 0); FindClose(hFind); #else closedir(cd); #endif skip_single: grokdir_level--; if (grokdir_level == 0 && !ISFLAG(flags, F_HIDEPROGRESS)) { fprintf(stderr, "\rScanning: %" PRIuMAX " files, %" PRIuMAX " items (in %u specified)", progress, item_progress, user_item_count); } return; error_travdone: fprintf(stderr, "\ncould not stat dir "); fwprint(stderr, dir, 1); return; error_cd: fprintf(stderr, "\ncould not chdir to "); fwprint(stderr, dir, 1); return; error_overflow: fprintf(stderr, "\nerror: a path buffer overflowed\n"); exit(EXIT_FAILURE); } /* Hash part or all of a file */ static jdupes_hash_t *get_filehash(const file_t * const restrict checkfile, const size_t max_read) { off_t fsize; /* This is an array because we return a pointer to it */ static jdupes_hash_t hash[1]; static jdupes_hash_t *chunk = NULL; FILE *file; int check = 0; XXH64_state_t *xxhstate; if (checkfile == NULL || checkfile->d_name == NULL) nullptr("get_filehash()"); LOUD(fprintf(stderr, "get_filehash('%s', %" PRIdMAX ")\n", checkfile->d_name, (intmax_t)max_read);) /* Allocate on first use */ if (chunk == NULL) { chunk = (jdupes_hash_t *)string_malloc(auto_chunk_size); if (!chunk) oom("get_filehash() chunk"); } /* Get the file size. If we can't read it, bail out early */ if (checkfile->size == -1) { LOUD(fprintf(stderr, "get_filehash: not hashing because stat() info is bad\n")); return NULL; } fsize = checkfile->size; /* Do not read more than the requested number of bytes */ if (max_read > 0 && fsize > (off_t)max_read) fsize = (off_t)max_read; /* Initialize the hash and file read parameters (with filehash_partial skipped) * * If we already hashed the first chunk of this file, we don't want to * wastefully read and hash it again, so skip the first chunk and use * the computed hash for that chunk as our starting point. */ *hash = 0; if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) { *hash = checkfile->filehash_partial; /* Don't bother going further if max_read is already fulfilled */ if (max_read != 0 && max_read <= PARTIAL_HASH_SIZE) { LOUD(fprintf(stderr, "Partial hash size (%d) >= max_read (%" PRIuMAX "), not hashing anymore\n", PARTIAL_HASH_SIZE, (uintmax_t)max_read);) return hash; } } errno = 0; #ifdef UNICODE if (!M2W(checkfile->d_name, wstr)) file = NULL; else file = _wfopen(wstr, FILE_MODE_RO); #else file = fopen(checkfile->d_name, FILE_MODE_RO); #endif if (file == NULL) { fprintf(stderr, "\n%s error opening file ", strerror(errno)); fwprint(stderr, checkfile->d_name, 1); return NULL; } /* Actually seek past the first chunk if applicable * This is part of the filehash_partial skip optimization */ if (ISFLAG(checkfile->flags, FF_HASH_PARTIAL)) { if (fseeko(file, PARTIAL_HASH_SIZE, SEEK_SET) == -1) { fclose(file); fprintf(stderr, "\nerror seeking in file "); fwprint(stderr, checkfile->d_name, 1); return NULL; } fsize -= PARTIAL_HASH_SIZE; } xxhstate = XXH64_createState(); if (xxhstate == NULL) nullptr("xxhstate"); XXH64_reset(xxhstate, 0); /* Read the file in CHUNK_SIZE chunks until we've read it all. */ while (fsize > 0) { size_t bytes_to_read; if (interrupt) return 0; bytes_to_read = (fsize >= (off_t)auto_chunk_size) ? auto_chunk_size : (size_t)fsize; if (fread((void *)chunk, bytes_to_read, 1, file) != 1) { fprintf(stderr, "\nerror reading from file "); fwprint(stderr, checkfile->d_name, 1); fclose(file); return NULL; } XXH64_update(xxhstate, chunk, bytes_to_read); if ((off_t)bytes_to_read > fsize) break; else fsize -= (off_t)bytes_to_read; if (!ISFLAG(flags, F_HIDEPROGRESS)) { check++; if (check > CHECK_MINIMUM) { update_progress("hashing", (int)(((checkfile->size - fsize) * 100) / checkfile->size)); check = 0; } } } fclose(file); *hash = XXH64_digest(xxhstate); XXH64_freeState(xxhstate); LOUD(fprintf(stderr, "get_filehash: returning hash: 0x%016jx\n", (uintmax_t)*hash)); return hash; } static inline void registerfile(filetree_t * restrict * const restrict nodeptr, const enum tree_direction d, file_t * const restrict file) { filetree_t * restrict branch; if (nodeptr == NULL || file == NULL || (d != NONE && *nodeptr == NULL)) nullptr("registerfile()"); LOUD(fprintf(stderr, "registerfile(direction %d)\n", d)); /* Allocate and initialize a new node for the file */ branch = (filetree_t *)string_malloc(sizeof(filetree_t)); if (branch == NULL) oom("registerfile() branch"); branch->file = file; branch->left = NULL; branch->right = NULL; /* Attach the new node to the requested branch */ switch (d) { case LEFT: (*nodeptr)->left = branch; break; case RIGHT: (*nodeptr)->right = branch; break; case NONE: /* For the root of the tree only */ *nodeptr = branch; break; default: /* This should never ever happen */ fprintf(stderr, "\ninternal error: invalid direction for registerfile(), report this\n"); string_malloc_destroy(); exit(EXIT_FAILURE); break; } return; } #ifdef TREE_DEPTH_STATS #define TREE_DEPTH_UPDATE_MAX() { if (max_depth < tree_depth) max_depth = tree_depth; tree_depth = 0; } #else #define TREE_DEPTH_UPDATE_MAX() #endif /* Check two files for a match */ static file_t **checkmatch(filetree_t * restrict tree, file_t * const restrict file) { int cmpresult = 0; int cantmatch = 0; const jdupes_hash_t * restrict filehash; if (tree == NULL || file == NULL || tree->file == NULL || tree->file->d_name == NULL || file->d_name == NULL) nullptr("checkmatch()"); LOUD(fprintf(stderr, "checkmatch ('%s', '%s')\n", tree->file->d_name, file->d_name)); /* If device and inode fields are equal one of the files is a * hard link to the other or the files have been listed twice * unintentionally. We don't want to flag these files as * duplicates unless the user specifies otherwise. */ /* Count the total number of comparisons requested */ DBG(comparisons++;) /* If considering hard linked files as duplicates, they are * automatically duplicates without being read further since * they point to the exact same inode. If we aren't considering * hard links as duplicates, we just return NULL. */ cmpresult = check_conditions(tree->file, file); switch (cmpresult) { case 2: return &tree->file; /* linked files + -H switch */ case -2: return NULL; /* linked files, no -H switch */ case -3: /* user order */ case -4: /* one filesystem */ case -5: /* permissions */ cantmatch = 1; cmpresult = 0; break; default: break; } /* Print pre-check (early) match candidates if requested */ if (ISFLAG(p_flags, PF_EARLYMATCH)) printf("Early match check passed:\n %s\n %s\n\n", file->d_name, tree->file->d_name); /* If preliminary matching succeeded, do main file data checks */ if (cmpresult == 0) { LOUD(fprintf(stderr, "checkmatch: starting file data comparisons\n")); /* Attempt to exclude files quickly with partial file hashing */ if (!ISFLAG(tree->file->flags, FF_HASH_PARTIAL)) { filehash = get_filehash(tree->file, PARTIAL_HASH_SIZE); if (filehash == NULL) return NULL; tree->file->filehash_partial = *filehash; SETFLAG(tree->file->flags, FF_HASH_PARTIAL); } if (!ISFLAG(file->flags, FF_HASH_PARTIAL)) { filehash = get_filehash(file, PARTIAL_HASH_SIZE); if (filehash == NULL) return NULL; file->filehash_partial = *filehash; SETFLAG(file->flags, FF_HASH_PARTIAL); } cmpresult = HASH_COMPARE(file->filehash_partial, tree->file->filehash_partial); LOUD(if (!cmpresult) fprintf(stderr, "checkmatch: partial hashes match\n")); LOUD(if (cmpresult) fprintf(stderr, "checkmatch: partial hashes do not match\n")); DBG(partial_hash++;) /* Print partial hash matching pairs if requested */ if (cmpresult == 0 && ISFLAG(p_flags, PF_PARTIAL)) printf("Partial hashes match:\n %s\n %s\n\n", file->d_name, tree->file->d_name); if (file->size <= PARTIAL_HASH_SIZE || ISFLAG(flags, F_PARTIALONLY)) { if (ISFLAG(flags, F_PARTIALONLY)) { LOUD(fprintf(stderr, "checkmatch: partial only mode: treating partial hash as full hash\n")); } else { LOUD(fprintf(stderr, "checkmatch: small file: copying partial hash to full hash\n")); } /* filehash_partial = filehash if file is small enough */ if (!ISFLAG(file->flags, FF_HASH_FULL)) { file->filehash = file->filehash_partial; SETFLAG(file->flags, FF_HASH_FULL); DBG(small_file++;) } if (!ISFLAG(tree->file->flags, FF_HASH_FULL)) { tree->file->filehash = tree->file->filehash_partial; SETFLAG(tree->file->flags, FF_HASH_FULL); DBG(small_file++;) } } else if (cmpresult == 0) { if (ISFLAG(flags, F_SKIPHASH)) { /* Skip full file hashing if requested by the user */ LOUD(fprintf(stderr, "checkmatch: skipping full file hashes (F_SKIPMATCH)\n")); } else { /* If partial match was correct, perform a full file hash match */ if (!ISFLAG(tree->file->flags, FF_HASH_FULL)) { filehash = get_filehash(tree->file, 0); if (filehash == NULL) return NULL; tree->file->filehash = *filehash; SETFLAG(tree->file->flags, FF_HASH_FULL); } if (!ISFLAG(file->flags, FF_HASH_FULL)) { filehash = get_filehash(file, 0); if (filehash == NULL) return NULL; file->filehash = *filehash; SETFLAG(file->flags, FF_HASH_FULL); } /* Full file hash comparison */ cmpresult = HASH_COMPARE(file->filehash, tree->file->filehash); LOUD(if (!cmpresult) fprintf(stderr, "checkmatch: full hashes match\n")); LOUD(if (cmpresult) fprintf(stderr, "checkmatch: full hashes do not match\n")); DBG(full_hash++); } } else { DBG(partial_elim++); } } /* if (cmpresult == 0) */ if ((cantmatch != 0) && (cmpresult == 0)) { LOUD(fprintf(stderr, "checkmatch: rejecting because match not allowed (cantmatch = 1)\n")); cmpresult = -1; } /* How the file tree works * * The tree is sorted by size as files arrive. If the files are the same * size, they are possible duplicates and are checked for duplication. * If they are not a match, the hashes are used to decide whether to * continue with the file to the left or the right in the file tree. * If the direction decision points to a leaf node, the duplicate scan * continues down that path; if it points to an empty node, the current * file is attached to the file tree at that point. * * This allows for quickly finding files of the same size by avoiding * tree branches with differing size groups. */ if (cmpresult < 0) { if (tree->left != NULL) { LOUD(fprintf(stderr, "checkmatch: recursing tree: left\n")); DBG(left_branch++; tree_depth++;) return checkmatch(tree->left, file); } else { LOUD(fprintf(stderr, "checkmatch: registering file: left\n")); registerfile(&tree, LEFT, file); TREE_DEPTH_UPDATE_MAX(); return NULL; } } else if (cmpresult > 0) { if (tree->right != NULL) { LOUD(fprintf(stderr, "checkmatch: recursing tree: right\n")); DBG(right_branch++; tree_depth++;) return checkmatch(tree->right, file); } else { LOUD(fprintf(stderr, "checkmatch: registering file: right\n")); registerfile(&tree, RIGHT, file); TREE_DEPTH_UPDATE_MAX(); return NULL; } } else { /* All compares matched */ DBG(partial_to_full++;) TREE_DEPTH_UPDATE_MAX(); LOUD(fprintf(stderr, "checkmatch: files appear to match based on hashes\n")); if (ISFLAG(p_flags, PF_FULLHASH)) printf("Full hashes match:\n %s\n %s\n\n", file->d_name, tree->file->d_name); return &tree->file; } /* Fall through - should never be reached */ return NULL; } /* Do a byte-by-byte comparison in case two different files produce the same signature. Unlikely, but better safe than sorry. */ static inline int confirmmatch(FILE * const restrict file1, FILE * const restrict file2, const off_t size) { static char *c1 = NULL, *c2 = NULL; size_t r1, r2; off_t bytes = 0; int check = 0; if (file1 == NULL || file2 == NULL) nullptr("confirmmatch()"); LOUD(fprintf(stderr, "confirmmatch running\n")); /* Allocate on first use; OOM if either is ever NULLed */ if (!c1) { c1 = (char *)string_malloc(auto_chunk_size); c2 = (char *)string_malloc(auto_chunk_size); } if (!c1 || !c2) oom("confirmmatch() c1/c2"); fseek(file1, 0, SEEK_SET); fseek(file2, 0, SEEK_SET); do { if (interrupt) return 0; r1 = fread(c1, sizeof(char), auto_chunk_size, file1); r2 = fread(c2, sizeof(char), auto_chunk_size, file2); if (r1 != r2) return 0; /* file lengths are different */ if (memcmp (c1, c2, r1)) return 0; /* file contents are different */ if (!ISFLAG(flags, F_HIDEPROGRESS)) { check++; bytes += (off_t)r1; if (check > CHECK_MINIMUM) { update_progress("confirm", (int)((bytes * 100) / size)); check = 0; } } } while (r2); return 1; } /* Count the following statistics: - Maximum number of files in a duplicate set (length of longest dupe chain) - Number of non-zero-length files that have duplicates (if n_files != NULL) - Total number of duplicate file sets (groups) */ extern unsigned int get_max_dupes(const file_t *files, unsigned int * const restrict max, unsigned int * const restrict n_files) { unsigned int groups = 0; if (files == NULL || max == NULL) nullptr("get_max_dupes()"); LOUD(fprintf(stderr, "get_max_dupes(%p, %p, %p)\n", (const void *)files, (void *)max, (void *)n_files)); *max = 0; if (n_files) *n_files = 0; while (files) { unsigned int n_dupes; if (ISFLAG(files->flags, FF_HAS_DUPES)) { groups++; if (n_files && files->size) (*n_files)++; n_dupes = 1; for (file_t *curdupe = files->duplicates; curdupe; curdupe = curdupe->duplicates) n_dupes++; if (n_dupes > *max) *max = n_dupes; } files = files->next; } return groups; } #ifndef NO_USER_ORDER static int sort_pairs_by_param_order(file_t *f1, file_t *f2) { if (!ISFLAG(flags, F_USEPARAMORDER)) return 0; if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_param_order()"); if (f1->user_order < f2->user_order) return -sort_direction; if (f1->user_order > f2->user_order) return sort_direction; return 0; } #endif static int sort_pairs_by_mtime(file_t *f1, file_t *f2) { if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_mtime()"); #ifndef NO_USER_ORDER int po = sort_pairs_by_param_order(f1, f2); if (po != 0) return po; #endif /* NO_USER_ORDER */ if (f1->mtime < f2->mtime) return -sort_direction; else if (f1->mtime > f2->mtime) return sort_direction; return 0; } static int sort_pairs_by_filename(file_t *f1, file_t *f2) { if (f1 == NULL || f2 == NULL) nullptr("sort_pairs_by_filename()"); #ifndef NO_USER_ORDER int po = sort_pairs_by_param_order(f1, f2); if (po != 0) return po; #endif /* NO_USER_ORDER */ return numeric_sort(f1->d_name, f2->d_name, sort_direction); } static void registerpair(file_t **matchlist, file_t *newmatch, int (*comparef)(file_t *f1, file_t *f2)) { file_t *traverse; file_t *back; /* NULL pointer sanity checks */ if (matchlist == NULL || newmatch == NULL || comparef == NULL) nullptr("registerpair()"); LOUD(fprintf(stderr, "registerpair: '%s', '%s'\n", (*matchlist)->d_name, newmatch->d_name);) SETFLAG((*matchlist)->flags, FF_HAS_DUPES); back = NULL; traverse = *matchlist; /* FIXME: This needs to be changed! As it currently stands, the compare * function only runs on a pair as it is registered and future pairs can * mess up the sort order. A separate sorting function should happen before * the dupe chain is acted upon rather than while pairs are registered. */ while (traverse) { if (comparef(newmatch, traverse) <= 0) { newmatch->duplicates = traverse; if (!back) { *matchlist = newmatch; /* update pointer to head of list */ SETFLAG(newmatch->flags, FF_HAS_DUPES); CLEARFLAG(traverse->flags, FF_HAS_DUPES); /* flag is only for first file in dupe chain */ } else back->duplicates = newmatch; break; } else { if (traverse->duplicates == 0) { traverse->duplicates = newmatch; if (!back) SETFLAG(traverse->flags, FF_HAS_DUPES); break; } } back = traverse; traverse = traverse->duplicates; } return; } static inline void help_text(void) { printf("Usage: jdupes [options] FILES and/or DIRECTORIES...\n\n"); printf("Duplicate file sets will be printed by default unless a different action\n"); printf("option is specified (delete, summarize, link, dedupe, etc.)\n"); #ifdef LOUD printf(" -@ --loud \toutput annoying low-level debug info while running\n"); #endif printf(" -0 --printnull \toutput nulls instead of CR/LF (like 'find -print0')\n"); printf(" -1 --one-file-system \tdo not match files on different filesystems/devices\n"); printf(" -A --nohidden \texclude hidden files from consideration\n"); #ifdef ENABLE_DEDUPE printf(" -B --dedupe \tdo a copy-on-write (reflink/clone) deduplication\n"); #endif printf(" -C --chunksize=# \toverride I/O chunk size (min %d, max %d)\n", MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); printf(" -d --delete \tprompt user for files to preserve and delete all\n"); printf(" \tothers; important: under particular circumstances,\n"); printf(" \tdata may be lost when using this option together\n"); printf(" \twith -s or --symlinks, or when specifying a\n"); printf(" \tparticular directory more than once; refer to the\n"); printf(" \tdocumentation for additional information\n"); #ifdef DEBUG printf(" -D --debug \toutput debug statistics after completion\n"); #endif printf(" -f --omitfirst \tomit the first file in each set of matches\n"); printf(" -h --help \tdisplay this help message\n"); #ifndef NO_HARDLINKS printf(" -H --hardlinks \ttreat any linked files as duplicate files. Normally\n"); printf(" \tlinked files are treated as non-duplicates for safety\n"); #endif printf(" -i --reverse \treverse (invert) the match sort order\n"); #ifndef NO_USER_ORDER printf(" -I --isolate \tfiles in the same specified directory won't match\n"); #endif printf(" -j --json \tproduce JSON (machine-readable) output\n"); printf(" -K --skiphash \tskip full file hashing (may be faster; 100%% safe)\n"); printf(" \tWARNING: in development, not fully working yet!\n"); #ifndef NO_SYMLINKS printf(" -l --linksoft \tmake relative symlinks for duplicates w/o prompting\n"); #endif #ifndef NO_HARDLINKS printf(" -L --linkhard \thard link all duplicate files without prompting\n"); #ifdef ON_WINDOWS printf(" \tWindows allows a maximum of 1023 hard links per file;\n"); printf(" \tlinking large match sets will result in multiple sets\n"); printf(" \tof hard linked files due to this limit.\n"); #endif /* ON_WINDOWS */ #endif /* NO_HARDLINKS */ printf(" -m --summarize \tsummarize dupe information\n"); printf(" -M --printwithsummary\twill print matches and --summarize at the end\n"); printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n"); printf(" \teach set of duplicates and delete the rest without\n"); printf(" \tprompting the user\n"); printf(" -o --order=BY \tselect sort order for output, linking and deleting; by\n"); printf(" \tmtime (BY=time) or filename (BY=name, the default)\n"); #ifndef NO_USER_ORDER printf(" -O --paramorder \tParameter order is more important than selected -o sort\n"); #endif #ifndef NO_PERMS printf(" -p --permissions \tdon't consider files with different owner/group or\n"); printf(" \tpermission bits as duplicates\n"); #endif printf(" -P --print=type \tprint extra info (partial, early, fullhash)\n"); printf(" -q --quiet \thide progress indicator\n"); printf(" -Q --quick \tskip byte-for-byte confirmation for quick matching\n"); printf(" \tWARNING: -Q can result in data loss! Be very careful!\n"); printf(" -r --recurse \tfor every directory, process its subdirectories too\n"); printf(" -R --recurse: \tfor each directory given after this option follow\n"); printf(" \tsubdirectories encountered within (note the ':' at\n"); printf(" \tthe end of the option, manpage for more details)\n"); #ifndef NO_SYMLINKS printf(" -s --symlinks \tfollow symlinks\n"); #endif printf(" -S --size \tshow size of duplicate files\n"); printf(" -t --nochangecheck\tdisable security check for file changes (aka TOCTTOU)\n"); printf(" -T --partial-only \tmatch based on partial hashes only. WARNING:\n"); printf(" \tEXTREMELY DANGEROUS paired with destructive actions!\n"); printf(" \t-T must be specified twice to work. Read the manual!\n"); printf(" -u --printunique \tprint only a list of unique (non-matched) files\n"); printf(" -v --version \tdisplay jdupes version and license information\n"); printf(" -x --xsize=SIZE \texclude files of size < SIZE bytes from consideration\n"); printf(" --xsize=+SIZE \t'+' specified before SIZE, exclude size > SIZE\n"); printf(" -X --extfilter=x:y\tfilter files based on specified criteria\n"); printf(" \tUse '-X help' for detailed extfilter help\n"); printf(" -z --zeromatch \tconsider zero-length files to be duplicates\n"); printf(" -Z --softabort \tIf the user aborts (i.e. CTRL-C) act on matches so far\n"); #ifndef ON_WINDOWS printf(" \tYou can send SIGUSR1 to the program to toggle this\n"); #endif #ifdef OMIT_GETOPT_LONG printf("Note: Long options are not supported in this build.\n\n"); #endif } static void help_text_extfilter(void) { printf("Detailed help for jdupes -X/--extfilter options\n"); printf("General format: jdupes -X filter[:value][size_suffix]\n\n"); printf("noext:ext1[,ext2,...] \tExclude files with certain extension(s)\n\n"); printf("onlyext:ext1[,ext2,...] \tOnly include files with certain extension(s)\n\n"); printf("size[+-=]:size[suffix] \tExclude files meeting certain size criteria\n"); printf(" \tSize specs: + larger, - smaller, = equal to\n"); printf(" \tSpecs can be mixed, i.e. size+=:100k will\n"); printf(" \texclude files 100KiB or larger in size.\n\n"); printf("nostr:text_string \tExclude all paths containing the string\n"); printf("onlystr:text_string \tOnly allow paths containing the string\n"); printf(" \tHINT: you can use these for directories:\n"); printf(" \t-X nostr:/dir_x/ or -X onlystr:/dir_x/\n"); printf("newer:datetime \tReject files newer than the specified date\n"); printf("older:datetime \tReject files newer than the specified date\n"); printf(" \tDate/time format: \"YYYY-MM-DD HH:MM:SS\"\n"); printf(" \tTime is optional (remember to escape spaces!)\n"); // printf("\t\n"); printf("\nSome filters take no value or multiple values. Filters that can take\n"); printf("a numeric option generally support the size multipliers K/M/G/T/P/E\n"); printf("with or without an added iB or B. Multipliers are binary-style unless\n"); printf("the B is used, which will use decimal multipliers. For example,\n"); printf("10k or 10kib = 10240; 10kb = 10000. Multipliers are case-insensitive.\n\n"); printf("Filters have cumulative effects: jdupes -X size+:100 -X size-:100 will\n"); printf("cause only files of exactly 100 bytes in size to be included.\n"); } #ifdef UNICODE int wmain(int argc, wchar_t **wargv) #else int main(int argc, char **argv) #endif { static file_t *files = NULL; static file_t *curfile; static char **oldargv; static char *xs; static int firstrecurse; static int opt; static int pm = 1; static int partialonly_spec = 0; static ordertype_t ordertype = ORDER_NAME; static long manual_chunk_size = 0; #ifndef ON_WINDOWS static struct proc_cacheinfo pci; #endif #ifdef ENABLE_DEDUPE static struct utsname utsname; #endif #ifndef OMIT_GETOPT_LONG static const struct option long_options[] = { { "loud", 0, 0, '@' }, { "printnull", 0, 0, '0' }, { "one-file-system", 0, 0, '1' }, { "nohidden", 0, 0, 'A' }, { "dedupe", 0, 0, 'B' }, { "chunksize", 1, 0, 'C' }, { "delete", 0, 0, 'd' }, { "debug", 0, 0, 'D' }, { "omitfirst", 0, 0, 'f' }, { "help", 0, 0, 'h' }, { "hardlinks", 0, 0, 'H' }, { "reverse", 0, 0, 'i' }, { "isolate", 0, 0, 'I' }, { "json", 0, 0, 'j' }, { "skiphash", 0, 0, 'K' }, { "linksoft", 0, 0, 'l' }, { "linkhard", 0, 0, 'L' }, { "summarize", 0, 0, 'm'}, { "printwithsummary", 0, 0, 'M'}, { "noempty", 0, 0, 'n' }, { "noprompt", 0, 0, 'N' }, { "order", 1, 0, 'o' }, { "paramorder", 0, 0, 'O' }, { "permissions", 0, 0, 'p' }, { "print", 0, 0, 'P' }, { "quiet", 0, 0, 'q' }, { "quick", 0, 0, 'Q' }, { "recurse", 0, 0, 'r' }, { "recursive", 0, 0, 'r' }, { "recurse:", 0, 0, 'R' }, { "recursive:", 0, 0, 'R' }, { "symlinks", 0, 0, 's' }, { "size", 0, 0, 'S' }, { "nochangecheck", 0, 0, 't' }, { "partial-only", 0, 0, 'T' }, { "printunique", 0, 0, 'u' }, { "version", 0, 0, 'v' }, { "xsize", 1, 0, 'x' }, { "exclude", 1, 0, 'X' }, { "extfilter", 1, 0, 'X' }, { "zeromatch", 0, 0, 'z' }, { "softabort", 0, 0, 'Z' }, { NULL, 0, 0, 0 } }; #define GETOPT getopt_long #else #define GETOPT getopt #endif /* Windows buffers our stderr output; don't let it do that */ #ifdef ON_WINDOWS if (setvbuf(stderr, NULL, _IONBF, 0) != 0) fprintf(stderr, "warning: setvbuf() failed\n"); #endif #ifdef UNICODE /* Create a UTF-8 **argv from the wide version */ static char **argv; argv = (char **)string_malloc(sizeof(char *) * (size_t)argc); if (!argv) oom("main() unicode argv"); widearg_to_argv(argc, wargv, argv); /* fix up __argv so getopt etc. don't crash */ __argv = argv; /* Only use UTF-16 for terminal output, else use UTF-8 */ if (!_isatty(_fileno(stdout))) out_mode = _O_BINARY; else out_mode = _O_U16TEXT; if (!_isatty(_fileno(stderr))) err_mode = _O_BINARY; else err_mode = _O_U16TEXT; #endif /* UNICODE */ #ifndef ON_WINDOWS /* Auto-tune chunk size to be half of L1 data cache if possible */ get_proc_cacheinfo(&pci); if (pci.l1 != 0) auto_chunk_size = (pci.l1 / 2); else if (pci.l1d != 0) auto_chunk_size = (pci.l1d / 2); /* Must be at least 4096 (4 KiB) and cannot exceed CHUNK_SIZE */ if (auto_chunk_size < MIN_CHUNK_SIZE || auto_chunk_size > MAX_CHUNK_SIZE) auto_chunk_size = CHUNK_SIZE; /* Force to a multiple of 4096 if it isn't already */ if ((auto_chunk_size & 0x00000fffUL) != 0) auto_chunk_size = (auto_chunk_size + 0x00000fffUL) & 0x000ff000; #endif /* ON_WINDOWS */ /* Is stderr a terminal? If not, we won't write progress to it */ #ifdef ON_WINDOWS if (!_isatty(_fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS); #else if (!isatty(fileno(stderr))) SETFLAG(flags, F_HIDEPROGRESS); #endif program_name = argv[0]; oldargv = cloneargs(argc, argv); while ((opt = GETOPT(argc, argv, "@01ABC:DdfHhIijKlLmMnNOPp:QqRrSsTtuVvZzo:x:X:" #ifndef OMIT_GETOPT_LONG , long_options, NULL #endif )) != EOF) { if ((uintptr_t)optarg == 0x20) goto error_optarg; switch (opt) { case '0': SETFLAG(flags, F_PRINTNULL); break; case '1': SETFLAG(flags, F_ONEFS); break; case 'A': SETFLAG(flags, F_EXCLUDEHIDDEN); break; case 'C': manual_chunk_size = strtol(optarg, NULL, 10) & 0x0ffff000L; /* Align to 4K sizes */ if (manual_chunk_size < MIN_CHUNK_SIZE || manual_chunk_size > MAX_CHUNK_SIZE) { fprintf(stderr, "warning: invalid manual chunk size (must be %d-%d); using defaults\n", MIN_CHUNK_SIZE, MAX_CHUNK_SIZE); LOUD(fprintf(stderr, "Manual chunk size (failed) was apparently '%s' => %ld\n", optarg, manual_chunk_size)); manual_chunk_size = 0; } else auto_chunk_size = (size_t)manual_chunk_size; LOUD(fprintf(stderr, "Manual chunk size is %ld\n", manual_chunk_size)); break; case 'd': SETFLAG(flags, F_DELETEFILES); break; case 'D': #ifdef DEBUG SETFLAG(flags, F_DEBUG); #endif break; case 'f': SETFLAG(flags, F_OMITFIRST); break; case 'h': help_text(); string_malloc_destroy(); exit(EXIT_FAILURE); #ifndef NO_HARDLINKS case 'H': SETFLAG(flags, F_CONSIDERHARDLINKS); break; case 'L': SETFLAG(flags, F_HARDLINKFILES); break; #endif case 'i': SETFLAG(flags, F_REVERSESORT); break; #ifndef NO_USER_ORDER case 'I': SETFLAG(flags, F_ISOLATE); break; case 'O': SETFLAG(flags, F_USEPARAMORDER); break; #else case 'I': case 'O': fprintf(stderr, "warning: -I and -O are disabled and ignored in this build\n"); break; #endif case 'j': SETFLAG(flags, F_PRINTJSON); break; case 'K': SETFLAG(flags, F_SKIPHASH); break; case 'm': SETFLAG(flags, F_SUMMARIZEMATCHES); break; case 'M': SETFLAG(flags, F_SUMMARIZEMATCHES); SETFLAG(flags, F_PRINTMATCHES); break; case 'n': //fprintf(stderr, "note: -n/--noempty is the default behavior now and is deprecated.\n"); break; case 'N': SETFLAG(flags, F_NOPROMPT); break; case 'p': SETFLAG(flags, F_PERMISSIONS); break; case 'P': if (strcmp(optarg, "partial") == 0) SETFLAG(p_flags, PF_PARTIAL); else if (strcmp(optarg, "early") == 0) SETFLAG(p_flags, PF_EARLYMATCH); else if (strcmp(optarg, "fullhash") == 0) SETFLAG(p_flags, PF_FULLHASH); else { fprintf(stderr, "Option '%s' is not valid for -P\n", optarg); exit(EXIT_FAILURE); } break; case 'q': SETFLAG(flags, F_HIDEPROGRESS); break; case 'Q': SETFLAG(flags, F_QUICKCOMPARE); break; case 'r': SETFLAG(flags, F_RECURSE); break; case 'R': SETFLAG(flags, F_RECURSEAFTER); break; case 't': SETFLAG(flags, F_NOCHANGECHECK); break; case 'T': if (partialonly_spec == 0) partialonly_spec = 1; else { partialonly_spec = 2; SETFLAG(flags, F_PARTIALONLY); } break; case 'u': SETFLAG(flags, F_PRINTUNIQUE); break; #ifndef NO_SYMLINKS case 'l': SETFLAG(flags, F_MAKESYMLINKS); break; case 's': SETFLAG(flags, F_FOLLOWLINKS); break; #endif case 'S': SETFLAG(flags, F_SHOWSIZE); break; case 'x': fprintf(stderr, "-x/--xsize is deprecated; use -X size[+-=]:size[suffix] instead\n"); xs = string_malloc(8 + strlen(optarg)); if (xs == NULL) oom("xsize temp string"); strcpy(xs, "size"); if (*optarg == '+') { strcat(xs, "+:"); optarg++; } else { strcat(xs, "-=:"); } strcat(xs, optarg); add_extfilter(xs); string_free(xs); break; case 'X': add_extfilter(optarg); break; case 'z': SETFLAG(flags, F_INCLUDEEMPTY); break; case 'Z': SETFLAG(flags, F_SOFTABORT); break; case '@': #ifdef LOUD_DEBUG SETFLAG(flags, F_DEBUG | F_LOUD | F_HIDEPROGRESS); #endif break; case 'v': case 'V': printf("jdupes %s (%s) ", VER, VERDATE); /* Indicate bitness information */ if (sizeof(uintptr_t) == 8) { if (sizeof(long) == 4) printf("64-bit i32\n"); else if (sizeof(long) == 8) printf("64-bit\n"); } else if (sizeof(uintptr_t) == 4) { if (sizeof(long) == 4) printf("32-bit\n"); else if (sizeof(long) == 8) printf("32-bit i64\n"); } else printf("%u-bit i%u\n", (unsigned int)(sizeof(uintptr_t) * 8), (unsigned int)(sizeof(long) * 8)); #ifdef BUILD_DATE #include "build_date.h" printf("Built on %s\n", BUILT_ON_DATE); #endif printf("Compile-time extensions:"); if (*extensions != NULL) { int c = 0; while (extensions[c] != NULL) { printf(" %s", extensions[c]); c++; } } else printf(" none"); printf("\nCopyright (C) 2015-2020 by Jody Bruchon and contributors\n"); printf("Forked from fdupes 1.51, (C) 1999-2014 Adrian Lopez and contributors\n\n"); printf("Permission is hereby granted, free of charge, to any person obtaining a copy of\n"); printf("this software and associated documentation files (the \"Software\"), to deal in\n"); printf("the Software without restriction, including without limitation the rights to\n"); printf("use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies\n"); printf("of the Software, and to permit persons to whom the Software is furnished to do\n"); printf("so, subject to the following conditions:\n\n"); printf("The above copyright notice and this permission notice shall be included in all\n"); printf("copies or substantial portions of the Software.\n\n"); printf("THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"); printf("IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"); printf("FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"); printf("AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"); printf("LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"); printf("OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"); printf("SOFTWARE.\n"); printf("\nIf you find this software useful, please consider financially supporting\n"); printf("its continued developemnt by donating to the author's SubscribeStar:\n"); printf(" https://SubscribeStar.com/JodyBruchon\n"); printf("\nNew releases, bug fixes, and more at the jdupes GitHub project page:\n"); printf(" https://github.com/jbruchon/jdupes\n"); exit(EXIT_SUCCESS); case 'o': if (!strncasecmp("name", optarg, 5)) { ordertype = ORDER_NAME; } else if (!strncasecmp("time", optarg, 5)) { ordertype = ORDER_TIME; } else { fprintf(stderr, "invalid value for --order: '%s'\n", optarg); exit(EXIT_FAILURE); } break; case 'B': #ifdef ENABLE_DEDUPE /* Refuse to dedupe on 2.x kernels; they could damage user data */ if (uname(&utsname)) { fprintf(stderr, "Failed to get kernel version! Aborting.\n"); exit(EXIT_FAILURE); } LOUD(fprintf(stderr, "dedupefiles: uname got release '%s'\n", utsname.release)); if (*(utsname.release) == '2' && *(utsname.release + 1) == '.') { fprintf(stderr, "Refusing to dedupe on a 2.x kernel; data loss could occur. Aborting.\n"); exit(EXIT_FAILURE); } SETFLAG(flags, F_DEDUPEFILES); /* btrfs will do the byte-for-byte check itself */ SETFLAG(flags, F_QUICKCOMPARE); /* It is completely useless to dedupe zero-length extents */ CLEARFLAG(flags, F_INCLUDEEMPTY); #else fprintf(stderr, "This program was built without btrfs support\n"); exit(EXIT_FAILURE); #endif break; default: if (opt != '?') fprintf(stderr, "Sorry, using '-%c' is not supported in this build.\n", opt); fprintf(stderr, "Try `jdupes --help' for more information.\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } } if (optind >= argc) { fprintf(stderr, "no files or directories specified (use -h option for help)\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } if (partialonly_spec == 1) { fprintf(stderr, "--partial-only specified only once (it's VERY DANGEROUS, read the manual!)\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } if (ISFLAG(flags, F_PARTIALONLY) && ISFLAG(flags, F_QUICKCOMPARE)) { fprintf(stderr, "--partial-only overrides --quick and is even more dangerous (read the manual!)\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) { fprintf(stderr, "options --recurse and --recurse: are not compatible\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) { fprintf(stderr, "options --summarize and --delete are not compatible\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } #ifdef ENABLE_DEDUPE if (ISFLAG(flags, F_CONSIDERHARDLINKS) && ISFLAG(flags, F_DEDUPEFILES)) fprintf(stderr, "warning: option --dedupe overrides the behavior of --hardlinks\n"); #endif /* If pm == 0, call printmatches() */ pm = !!ISFLAG(flags, F_SUMMARIZEMATCHES) + !!ISFLAG(flags, F_DELETEFILES) + !!ISFLAG(flags, F_HARDLINKFILES) + !!ISFLAG(flags, F_MAKESYMLINKS) + !!ISFLAG(flags, F_PRINTJSON) + !!ISFLAG(flags, F_PRINTUNIQUE) + !!ISFLAG(flags, F_DEDUPEFILES); if (pm > 1) { fprintf(stderr, "Only one of --summarize, --printwithsummary, --delete, --linkhard,\n--linksoft, --json, or --dedupe may be used\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } if (pm == 0) SETFLAG(flags, F_PRINTMATCHES); if (ISFLAG(flags, F_RECURSEAFTER)) { firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv); if (firstrecurse == argc) firstrecurse = nonoptafter("-R", argc, oldargv, argv); if (firstrecurse == argc) { fprintf(stderr, "-R option must be isolated from other options\n"); string_malloc_destroy(); exit(EXIT_FAILURE); } /* F_RECURSE is not set for directories before --recurse: */ for (int x = optind; x < firstrecurse; x++) { slash_convert(argv[x]); grokdir(argv[x], &files, 0); user_item_count++; } /* Set F_RECURSE for directories after --recurse: */ SETFLAG(flags, F_RECURSE); for (int x = firstrecurse; x < argc; x++) { slash_convert(argv[x]); grokdir(argv[x], &files, 1); user_item_count++; } } else { for (int x = optind; x < argc; x++) { slash_convert(argv[x]); grokdir(argv[x], &files, ISFLAG(flags, F_RECURSE)); user_item_count++; } } /* We don't need the double traversal check tree anymore */ travdone_free(travdone_head); if (ISFLAG(flags, F_REVERSESORT)) sort_direction = -1; if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\n"); if (!files) { fwprint(stderr, "No duplicates found.", 1); string_malloc_destroy(); exit(EXIT_SUCCESS); } curfile = files; progress = 0; /* Catch CTRL-C */ signal(SIGINT, sighandler); #ifndef ON_WINDOWS /* Catch SIGUSR1 and use it to enable -Z */ signal(SIGUSR1, sigusr1); #endif while (curfile) { static file_t **match = NULL; static FILE *file1; static FILE *file2; if (interrupt) { fprintf(stderr, "\nStopping file scan due to user abort\n"); if (!ISFLAG(flags, F_SOFTABORT)) exit(EXIT_FAILURE); interrupt = 0; /* reset interrupt for re-use */ goto skip_file_scan; } LOUD(fprintf(stderr, "\nMAIN: current file: %s\n", curfile->d_name)); if (!checktree) registerfile(&checktree, NONE, curfile); else match = checkmatch(checktree, curfile); /* Byte-for-byte check that a matched pair are actually matched */ if (match != NULL) { /* Quick or partial-only compare will never run confirmmatch() * Also skip match confirmation for hard-linked files * (This set of comparisons is ugly, but quite efficient) */ if (ISFLAG(flags, F_QUICKCOMPARE) || ISFLAG(flags, F_PARTIALONLY) || (ISFLAG(flags, F_CONSIDERHARDLINKS) && (curfile->inode == (*match)->inode) && (curfile->device == (*match)->device)) ) { LOUD(fprintf(stderr, "MAIN: notice: hard linked, quick, or partial-only match (-H/-Q/-T)\n")); registerpair(match, curfile, (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename); dupecount++; goto skip_full_check; } #ifdef UNICODE if (!M2W(curfile->d_name, wstr)) file1 = NULL; else file1 = _wfopen(wstr, FILE_MODE_RO); #else file1 = fopen(curfile->d_name, FILE_MODE_RO); #endif if (!file1) { LOUD(fprintf(stderr, "MAIN: warning: file1 fopen() failed ('%s')\n", curfile->d_name)); curfile = curfile->next; continue; } #ifdef UNICODE if (!M2W((*match)->d_name, wstr)) file2 = NULL; else file2 = _wfopen(wstr, FILE_MODE_RO); #else file2 = fopen((*match)->d_name, FILE_MODE_RO); #endif if (!file2) { fclose(file1); LOUD(fprintf(stderr, "MAIN: warning: file2 fopen() failed ('%s')\n", (*match)->d_name)); curfile = curfile->next; continue; } if (confirmmatch(file1, file2, curfile->size)) { LOUD(fprintf(stderr, "MAIN: registering matched file pair\n")); registerpair(match, curfile, (ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename); dupecount++; } DBG(else hash_fail++;) fclose(file1); fclose(file2); } skip_full_check: curfile = curfile->next; if (!ISFLAG(flags, F_HIDEPROGRESS)) update_progress(NULL, -1); progress++; } if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%60s\r", " "); skip_file_scan: /* Stop catching CTRL+C */ signal(SIGINT, SIG_DFL); if (ISFLAG(flags, F_DELETEFILES)) { if (ISFLAG(flags, F_NOPROMPT)) deletefiles(files, 0, 0); else deletefiles(files, 1, stdin); } #ifndef NO_SYMLINKS if (ISFLAG(flags, F_MAKESYMLINKS)) linkfiles(files, 0); #endif #ifndef NO_HARDLINKS if (ISFLAG(flags, F_HARDLINKFILES)) linkfiles(files, 1); #endif /* NO_HARDLINKS */ #ifdef ENABLE_DEDUPE if (ISFLAG(flags, F_DEDUPEFILES)) dedupefiles(files); #endif /* ENABLE_DEDUPE */ if (ISFLAG(flags, F_PRINTMATCHES)) printmatches(files); if (ISFLAG(flags, F_PRINTUNIQUE)) printunique(files); if (ISFLAG(flags, F_PRINTJSON)) printjson(files, argc, argv); if (ISFLAG(flags, F_SUMMARIZEMATCHES)) { if (ISFLAG(flags, F_PRINTMATCHES)) printf("\n\n"); summarizematches(files); } string_malloc_destroy(); #ifdef DEBUG if (ISFLAG(flags, F_DEBUG)) { fprintf(stderr, "\n%d partial (+%d small) -> %d full hash -> %d full (%d partial elim) (%d hash%u fail)\n", partial_hash, small_file, full_hash, partial_to_full, partial_elim, hash_fail, (unsigned int)sizeof(jdupes_hash_t)*8); fprintf(stderr, "%" PRIuMAX " total files, %" PRIuMAX " comparisons, branch L %u, R %u, both %u, max tree depth %u\n", filecount, comparisons, left_branch, right_branch, left_branch + right_branch, max_depth); fprintf(stderr, "SMA: allocs %" PRIuMAX ", free %" PRIuMAX " (merge %" PRIuMAX ", repl %" PRIuMAX "), fail %" PRIuMAX ", reuse %" PRIuMAX ", scan %" PRIuMAX ", tails %" PRIuMAX "\n", sma_allocs, sma_free_good, sma_free_merged, sma_free_replaced, sma_free_ignored, sma_free_reclaimed, sma_free_scanned, sma_free_tails); if (manual_chunk_size > 0) fprintf(stderr, "I/O chunk size: %ld KiB (manually set)\n", manual_chunk_size >> 10); else { #ifndef ON_WINDOWS fprintf(stderr, "I/O chunk size: %" PRIuMAX " KiB (%s)\n", (uintmax_t)(auto_chunk_size >> 10), (pci.l1 + pci.l1d) != 0 ? "dynamically sized" : "default size"); #else fprintf(stderr, "I/O chunk size: %" PRIuMAX " KiB (default size)\n", (uintmax_t)(auto_chunk_size >> 10)); #endif } #ifdef ON_WINDOWS #ifndef NO_HARDLINKS if (ISFLAG(flags, F_HARDLINKFILES)) fprintf(stderr, "Exclusions based on Windows hard link limit: %u\n", hll_exclude); #endif #endif } #endif /* DEBUG */ exit(EXIT_SUCCESS); error_optarg: fprintf(stderr, "error: option '%c' requires an argument\n", opt); exit(EXIT_FAILURE); } jdupes-1.18.1/jdupes.h000066400000000000000000000163101370142704600145440ustar00rootroot00000000000000/* jdupes main program header * See jdupes.c for license information */ #ifndef JDUPES_H #define JDUPES_H #ifdef __cplusplus extern "C" { #endif /* Detect Windows and modify as needed */ #if defined _WIN32 || defined __CYGWIN__ #ifndef ON_WINDOWS #define ON_WINDOWS 1 #endif #define NO_SYMLINKS 1 #define NO_PERMS 1 #define NO_SIGACTION 1 #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #include #include "win_stat.h" #endif /* Win32 */ #include #include #include #include #include "string_malloc.h" #include "jody_sort.h" #include "version.h" #include "xxhash.h" /* Set hash type (change this if swapping in a different hash function) */ typedef XXH64_hash_t jdupes_hash_t; /* Some types are different on Windows */ #ifdef ON_WINDOWS typedef uint64_t jdupes_ino_t; typedef uint32_t jdupes_mode_t; extern const char dir_sep; #ifdef UNICODE extern const wchar_t *FILE_MODE_RO; #else extern const char *FILE_MODE_RO; #endif /* UNICODE */ #else /* Not Windows */ #include typedef ino_t jdupes_ino_t; typedef mode_t jdupes_mode_t; extern const char *FILE_MODE_RO; extern const char dir_sep; #ifdef UNICODE #error Do not define UNICODE on non-Windows platforms. #undef UNICODE #endif #endif /* _WIN32 || __CYGWIN__ */ /* Windows + Unicode compilation */ #ifdef UNICODE #define WPATH_MAX 8192 #define PATHBUF_SIZE WPATH_MAX typedef wchar_t wpath_t[WPATH_MAX]; extern int out_mode; extern int err_mode; #define M2W(a,b) MultiByteToWideChar(CP_UTF8, 0, a, -1, (LPWSTR)b, WPATH_MAX) #define W2M(a,b) WideCharToMultiByte(CP_UTF8, 0, a, -1, (LPSTR)b, WPATH_MAX, NULL, NULL) #endif /* UNICODE */ #ifndef NO_SYMLINKS #include "jody_paths.h" #endif #define ISFLAG(a,b) ((a & b) == b) #define SETFLAG(a,b) (a |= b) #define CLEARFLAG(a,b) (a &= (~b)) /* Low memory option overrides */ #ifdef LOW_MEMORY #ifndef NO_PERMS #define NO_PERMS 1 #endif #endif /* Aggressive verbosity for deep debugging */ #ifdef LOUD_DEBUG #ifndef DEBUG #define DEBUG #endif #define LOUD(...) if ISFLAG(flags, F_LOUD) __VA_ARGS__ #else #define LOUD(a) #endif /* Compile out debugging stat counters unless requested */ #ifdef DEBUG #define DBG(a) a #ifndef TREE_DEPTH_STATS #define TREE_DEPTH_STATS #endif #else #define DBG(a) #endif /* Behavior modification flags */ extern uint_fast32_t flags; #define F_RECURSE (1U << 0) #define F_HIDEPROGRESS (1U << 1) #define F_SOFTABORT (1U << 2) #define F_FOLLOWLINKS (1U << 3) #define F_DELETEFILES (1U << 4) #define F_INCLUDEEMPTY (1U << 5) #define F_CONSIDERHARDLINKS (1U << 6) #define F_SHOWSIZE (1U << 7) #define F_OMITFIRST (1U << 8) #define F_RECURSEAFTER (1U << 9) #define F_NOPROMPT (1U << 10) #define F_SUMMARIZEMATCHES (1U << 11) #define F_EXCLUDEHIDDEN (1U << 12) #define F_PERMISSIONS (1U << 13) #define F_HARDLINKFILES (1U << 14) #define F_EXCLUDESIZE (1U << 15) #define F_QUICKCOMPARE (1U << 16) #define F_USEPARAMORDER (1U << 17) #define F_DEDUPEFILES (1U << 18) #define F_REVERSESORT (1U << 19) #define F_ISOLATE (1U << 20) #define F_MAKESYMLINKS (1U << 21) #define F_PRINTMATCHES (1U << 22) #define F_ONEFS (1U << 23) #define F_PRINTNULL (1U << 24) #define F_PARTIALONLY (1U << 25) #define F_NOCHANGECHECK (1U << 26) #define F_PRINTJSON (1U << 27) #define F_SKIPHASH (1U << 28) #define F_PRINTUNIQUE (1U << 29) #define F_LOUD (1U << 30) #define F_DEBUG (1U << 31) /* Per-file true/false flags */ #define FF_VALID_STAT (1U << 0) #define FF_HASH_PARTIAL (1U << 1) #define FF_HASH_FULL (1U << 2) #define FF_HAS_DUPES (1U << 3) #define FF_IS_SYMLINK (1U << 4) #define FF_NOT_UNIQUE (1U << 5) /* Extra print flags */ #define PF_PARTIAL (1U << 0) #define PF_EARLYMATCH (1U << 1) #define PF_FULLHASH (1U << 2) typedef enum { ORDER_NAME = 0, ORDER_TIME } ordertype_t; #ifndef PARTIAL_HASH_SIZE #define PARTIAL_HASH_SIZE 4096 #endif /* Maximum path buffer size to use; must be large enough for a path plus * any work that might be done to the array it's stored in. PATH_MAX is * not always true. Read this article on the false promises of PATH_MAX: * http://insanecoding.blogspot.com/2007/11/pathmax-simply-isnt.html * Windows + Unicode needs a lot more space than UTF-8 in Linux/Mac OS X */ #ifndef PATHBUF_SIZE #define PATHBUF_SIZE 4096 #endif /* Per-file information */ typedef struct _file { struct _file *duplicates; struct _file *next; char *d_name; dev_t device; jdupes_mode_t mode; off_t size; jdupes_ino_t inode; jdupes_hash_t filehash_partial; jdupes_hash_t filehash; time_t mtime; uint32_t flags; /* Status flags */ #ifndef NO_USER_ORDER unsigned int user_order; /* Order of the originating command-line parameter */ #endif #ifndef NO_HARDLINKS #ifndef ON_WINDOWS nlink_t nlink; #else uint32_t nlink; /* link count on Windows is always a DWORD */ #endif #endif #ifndef NO_PERMS uid_t uid; gid_t gid; #endif } file_t; typedef struct _filetree { file_t *file; struct _filetree *left; struct _filetree *right; } filetree_t; /* This gets used in many functions */ #ifdef ON_WINDOWS extern struct winstat s; #define STAT win_stat #else extern struct stat s; #define STAT stat #endif /* -X extended filter parameter stack */ struct extfilter { struct extfilter *next; unsigned int flags; int64_t size; /* also used for other large integers */ char param[]; }; /* Extended filter parameter flags */ #define XF_EXCL_EXT 0x00000001U #define XF_SIZE_EQ 0x00000002U #define XF_SIZE_GT 0x00000004U #define XF_SIZE_LT 0x00000008U #define XF_ONLY_EXT 0x00000010U #define XF_EXCL_STR 0x00000020U #define XF_ONLY_STR 0x00000040U #define XF_DATE_NEWER 0x00000080U #define XF_DATE_OLDER 0x00000100U /* The X-than-or-equal are combination flags */ #define XF_SIZE_GTEQ 0x00000006U #define XF_SIZE_LTEQ 0x0000000aU /* Flags that use a numeric size with optional suffix */ #define XF_REQ_NUMBER 0x0000000eU /* Flags that require a data parameter (after a colon) */ #define XF_REQ_VALUE 0x0000001fU /* Flags that take a date that needs to be converted to time_t seconds */ #define XF_REQ_DATE 0x00000180U /* Exclude definition array */ struct extfilter_tags { const char * const tag; const uint32_t flags; }; extern const struct extfilter_tags extfilter_tags[]; extern struct extfilter *extfilter_head; /* Suffix definitions (treat as case-insensitive) */ struct size_suffix { const char * const suffix; const int64_t multiplier; }; extern const struct size_suffix size_suffix[]; extern char tempname[PATHBUF_SIZE * 2]; extern const char *extensions[]; extern void oom(const char * const restrict msg); extern void nullptr(const char * restrict func); extern int file_has_changed(file_t * const restrict file); extern int getfilestats(file_t * const restrict file); extern int getdirstats(const char * const restrict name, jdupes_ino_t * const restrict inode, dev_t * const restrict dev, jdupes_mode_t * const restrict mode); extern int check_conditions(const file_t * const restrict file1, const file_t * const restrict file2); extern unsigned int get_max_dupes(const file_t *files, unsigned int * const restrict max, unsigned int * const restrict n_files); #ifdef __cplusplus } #endif #endif /* JDUPES_H */ jdupes-1.18.1/jody_cacheinfo.c000066400000000000000000000052071370142704600162140ustar00rootroot00000000000000/* Detect and report size of CPU caches * * Copyright (C) 2020 by Jody Bruchon * Distributed under The MIT License * * If an error occurs or a cache is missing, zeroes are returned * Unified caches populate l1/l2/l3; split caches populate lXi/lXd instead */ #include #include #include #include "jody_cacheinfo.h" /* None of this code is useful on Windows, don't build anything there */ #ifndef ON_WINDOWS static char *pathidx; static char buf[16]; static char path[64] = "/sys/devices/system/cpu/cpu0/cache/index"; /*** End declarations, begin code ***/ /* Linux sysfs */ static size_t read_procfile(const char * const restrict name) { FILE *fp; size_t i; if (name == NULL) return 0; memset(buf, 0, 16); /* Create path */ *pathidx = '\0'; strcpy(pathidx, name); fp = fopen(path, "rb"); if (fp == NULL) return 0; i = fread(buf, 1, 16, fp); if (ferror(fp)) return 0; fclose(fp); return i; } void get_proc_cacheinfo(struct proc_cacheinfo *pci) { char *idx; size_t i; size_t size; int level; char type; char index; if (pci == NULL) return; memset(pci, 0, sizeof(struct proc_cacheinfo)); i = strlen(path); if (i > 48) return; idx = path + i; pathidx = idx + 1; *pathidx = '/'; pathidx++; for (index = '0'; index < '9'; index++) { *idx = index; /* Get the level for this index */ if (read_procfile("level") == 0) break; if (*buf < '1' || *buf > '3') break; else level = (*buf) + 1 - '1'; /* Get the size */ if (read_procfile("size") == 0) break; size = (size_t)atoi(buf) * 1024; if (size == 0) break; /* Get the type */ if (read_procfile("type") == 0) break; if (*buf != 'U' && *buf != 'I' && *buf != 'D') break; type = *buf; /* Act on it */ switch (type) { case 'D': switch (level) { case 1: pci->l1d = size; break; case 2: pci->l2d = size; break; case 3: pci->l3d = size; break; default: return; }; break; case 'I': switch (level) { case 1: pci->l1i = size; break; case 2: pci->l2i = size; break; case 3: pci->l3i = size; break; default: return; }; break; case 'U': switch (level) { case 1: pci->l1 = size; break; case 2: pci->l2 = size; break; case 3: pci->l3 = size; break; default: return; }; break; default: return; } /* Continue to next index */ } return; } #endif /* ON_WINDOWS */ /* This is for testing only */ #if 0 int main(void) { static struct proc_cacheinfo pci; get_proc_cacheinfo(&pci); printf("Cache: L1 %d,%d,%d L2 %d,%d,%d L3 %d,%d,%d\n", pci.l1, pci.l1i, pci.l1d, pci.l2, pci.l2i, pci.l2d, pci.l3, pci.l3i, pci.l3d); return 0; } #endif jdupes-1.18.1/jody_cacheinfo.h000066400000000000000000000012171370142704600162160ustar00rootroot00000000000000/* Detect size of CPU data caches * See jody_cacheinfo.c for license information */ #ifndef JODY_CACHEINFO_H #define JODY_CACHEINFO_H #ifdef __cplusplus extern "C" { #endif /* Don't allow anything on Windows */ #ifndef ON_WINDOWS /* Cache information structure * Split caches populate i/d, unified caches populate non-i/d */ struct proc_cacheinfo { size_t l1; size_t l1i; size_t l1d; size_t l2; size_t l2i; size_t l2d; size_t l3; size_t l3i; size_t l3d; }; extern void get_proc_cacheinfo(struct proc_cacheinfo *pci); #else #define get_proc_cacheinfo(a) #endif /* ON_WINDOWS */ #ifdef __cplusplus } #endif #endif /* JODY_CACHEINFO_H */ jdupes-1.18.1/jody_paths.c000066400000000000000000000105511370142704600154120ustar00rootroot00000000000000/* Jody Bruchon's path manipulation code library * * Copyright (C) 2014-2020 by Jody Bruchon * Released under The MIT License */ #include #include #include #include #include #include "jody_paths.h" /* Collapse dot-dot and single dot path components * This code MUST be passed a full file pathname (starting with '/') */ extern int collapse_dotdot(char * const path) { char *p; /* string copy input */ char *out; /* string copy output */ unsigned int i = 0; /* Fail if not passed an absolute path */ if (*path != '/') return -1; p = path; out = path; while (*p != '\0') { /* Abort if we're too close to the end of the buffer */ if (i >= (PATHBUF_SIZE - 3)) return -2; /* Skip repeated slashes */ while (*p == '/' && *(p + 1) == '/') { p++; i++; } /* Scan for '/./', '/..', '/.\0' combinations */ if (*p == '/' && *(p + 1) == '.' && (*(p + 2) == '.' || *(p + 2) == '/' || *(p + 2) == '\0')) { /* Check for '../' or terminal '..' */ if (*(p + 2) == '.' && (*(p + 3) == '/' || *(p + 3) == '\0')) { /* Found a dot-dot; pull everything back to the previous directory */ p += 3; i += 3; /* If already at root, skip over the dot-dot */ if (i == 0) continue; /* Don't seek back past the first character */ if ((uintptr_t)out == (uintptr_t)path) continue; out--; while (*out != '/') out--; if (*p == '\0') break; continue; } else if (*(p + 2) == '/' || *(p + 2) == '\0') { /* Found a single dot; seek input ptr past it */ p += 2; i += 2; if (*p == '\0') break; continue; } /* Fall through: not a dot or dot-dot, just a slash */ } /* Copy all remaining text */ *out = *p; p++; out++; i++; } /* If only a root slash remains, be sure to keep it */ if ((uintptr_t)out == (uintptr_t)path) { *out = '/'; out++; } /* Output must always be terminated properly */ *out = '\0'; return 0; } /* Create a relative symbolic link path for a destination file */ extern int make_relative_link_name(const char * const src, const char * const dest, char * rel_path) { static char p1[PATHBUF_SIZE * 2], p2[PATHBUF_SIZE * 2]; static char *sp, *dp, *ss; if (!src || !dest) goto error_null_param; /* Get working directory path and prefix to pathnames if needed */ if (*src != '/' || *dest != '/') { if (!getcwd(p1, PATHBUF_SIZE * 2)) goto error_getcwd; *(p1 + (PATHBUF_SIZE * 2) - 1) = '\0'; strncat(p1, "/", PATHBUF_SIZE * 2 - 1); strncpy(p2, p1, PATHBUF_SIZE * 2); } /* If an absolute path is provided, use it as-is */ if (*src == '/') *p1 = '\0'; if (*dest == '/') *p2 = '\0'; /* Concatenate working directory to relative paths */ strncat(p1, src, PATHBUF_SIZE); strncat(p2, dest, PATHBUF_SIZE); /* Collapse . and .. path components */ if (collapse_dotdot(p1) != 0) goto error_cdd; if (collapse_dotdot(p2) != 0) goto error_cdd; /* Find where paths differ, remembering each slash along the way */ sp = p1; dp = p2; ss = p1; while (*sp == *dp && *sp != '\0' && *dp != '\0') { if (*sp == '/') ss = sp; sp++; dp++; } /* If paths are 100% identical then the files are the same file */ if (*sp == '\0' && *dp == '\0') return 1; /* Replace dirs in destination path with dot-dot */ while (*dp != '\0') { if (*dp == '/') { *rel_path++ = '.'; *rel_path++ = '.'; *rel_path++ = '/'; } dp++; } /* Copy the file name into rel_path and return */ ss++; while (*ss != '\0') *rel_path++ = *ss++; /* . and .. dirs at end are invalid */ if (*(rel_path - 1) == '.') if (*(rel_path - 2) == '/' || (*(rel_path - 2) == '.' && *(rel_path - 3) == '/')) goto error_dir_end; if (*(rel_path - 1) == '/') goto error_dir_end; *rel_path = '\0'; return 0; error_null_param: fprintf(stderr, "Internal error: get_relative_name has NULL parameter\n"); fprintf(stderr, "Report this as a serious bug to the author\n"); exit(EXIT_FAILURE); error_getcwd: fprintf(stderr, "error: couldn't get the current directory\n"); return -1; error_cdd: fprintf(stderr, "internal error: collapse_dotdot() call failed\n"); return -2; error_dir_end: fprintf(stderr, "internal error: get_relative_name() result has directory at end\n"); return -3; } jdupes-1.18.1/jody_paths.h000066400000000000000000000007111370142704600154140ustar00rootroot00000000000000/* Jody Bruchon's path manipulation code library * See jody_paths.c for license information */ #ifndef JODY_PATHS_H #define JODY_PATHS_H #ifdef __cplusplus extern "C" { #endif #ifndef PATHBUF_SIZE #define PATHBUF_SIZE 4096 #endif extern int collapse_dotdot(char * const path); extern int make_relative_link_name(const char * const src, const char * const dest, char * rel_path); #ifdef __cplusplus } #endif #endif /* JODY_PATHS_H */ jdupes-1.18.1/jody_sort.c000066400000000000000000000050361370142704600152640ustar00rootroot00000000000000/* Jody Bruchon's sorting code library * * Copyright (C) 2014-2020 by Jody Bruchon * Released under The MIT License */ #include #include "jody_sort.h" #define IS_NUM(a) (((a >= '0') && (a <= '9')) ? 1 : 0) extern int numeric_sort(const char * restrict c1, const char * restrict c2, int sort_direction) { int len1 = 0, len2 = 0; int precompare = 0; if (c1 == NULL || c2 == NULL) return -99; /* Numerically correct sort */ while (*c1 != '\0' && *c2 != '\0') { /* Reset string length counters */ len1 = 0; len2 = 0; /* Skip all sequences of zeroes */ while (*c1 == '0') { len1++; c1++; } while (*c2 == '0') { len2++; c2++; } /* If both chars are numeric, do a numeric comparison */ if (IS_NUM(*c1) && IS_NUM(*c2)) { precompare = 0; /* Scan numbers and get preliminary results */ while (IS_NUM(*c1) && IS_NUM(*c2)) { if (*c1 < *c2) precompare = -sort_direction; if (*c1 > *c2) precompare = sort_direction; len1++; len2++; c1++; c2++; /* Skip remaining digit pairs after any * difference is found */ if (precompare != 0) { while (IS_NUM(*c1) && IS_NUM(*c2)) { len1++; len2++; c1++; c2++; } break; } } /* One numeric and one non-numeric means the * numeric one is larger and sorts later */ if (IS_NUM(*c1) ^ IS_NUM(*c2)) { if (IS_NUM(*c1)) return sort_direction; else return -sort_direction; } /* If the last test fell through, numbers are * of equal length. Use the precompare result * as the result for this number comparison. */ if (precompare != 0) return precompare; } /* Do normal comparison */ if (*c1 == *c2 && *c1 != '\0' && *c2 != '\0') { c1++; c2++; len1++; len2++; /* Put symbols and spaces after everything else */ } else if (*c2 < '.' && *c1 >= '.') return -sort_direction; else if (*c1 < '.' && *c2 >= '.') return sort_direction; /* Normal strcmp() style compare */ else if (*c1 > *c2) return sort_direction; else return -sort_direction; } /* Longer strings generally sort later */ if (len1 < len2) return -sort_direction; if (len1 > len2) return sort_direction; /* Normal strcmp() style comparison */ if (*c1 == '\0' && *c2 != '\0') return -sort_direction; if (*c1 != '\0' && *c2 == '\0') return sort_direction; /* Fall through: the strings are equal */ return 0; } jdupes-1.18.1/jody_sort.h000066400000000000000000000005171370142704600152700ustar00rootroot00000000000000/* Jody Bruchon's sorting code library * See jody_sort.c for license information */ #ifndef JODY_SORT_H #define JODY_SORT_H #ifdef __cplusplus extern "C" { #endif extern int numeric_sort(const char * restrict c1, const char * restrict c2, int sort_direction); #ifdef __cplusplus } #endif #endif /* JODY_SORT_H */ jdupes-1.18.1/jody_strtoepoch.c000066400000000000000000000045261370142704600164720ustar00rootroot00000000000000/* Jody Bruchon's datetime-to-epoch conversion function * * Copyright (C) 2020 by Jody Bruchon * Released under The MIT License */ #include #include #include #define REQ_NUM(a) { if (a < '0' || a > '9') return -1; } #define ATONUM(a,b) (a = b - '0') /* Fast multiplies by 100 (*64 + *32 + *4) and 10 (*8 + *2) */ #define MUL100(a) ((a << 6) + (a << 5) + (a << 2)) #define MUL10(a) ((a << 3) + a + a) /* Accepts date[time] strings "YYYY-MM-DD" or "YYYY-MM-DD HH:MM:SS" * and returns the number of seconds since the Unix Epoch a la mktime() * or returns -1 on any error */ time_t strtoepoch(const char * const datetime) { time_t secs = 0; /* 1970-01-01 00:00:00 */ const char * restrict p = datetime; int i; struct tm tm; if (datetime == NULL || *datetime == '\0') return -1; memset(&tm, 0, sizeof(struct tm)); /* This code replaces "*10" with shift<<3 + add + add */ /* Process year */ tm.tm_year = 1000; REQ_NUM(*p); if (*p == '2') tm.tm_year = 2000; p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_year += MUL100(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_year += MUL10(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_year += i; p++; tm.tm_year -= 1900; /* struct tm year is since 1900 */ if (*p != '-') return -1; p++; /* Process month (0-11, not 1-12) */ REQ_NUM(*p); ATONUM(i, *p); tm.tm_mon = MUL10(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_mon += (i - 1); p++; if (*p != '-') return -1; p++; /* Process day */ REQ_NUM(*p); ATONUM(i, *p); tm.tm_mday = MUL10(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_mday += i; p++; /* If YYYY-MM-DD is specified only, skip the time part */ if (*p == '\0') goto skip_time; if (*p != ' ') return -1; else p++; /* Process hours */ REQ_NUM(*p); ATONUM(i, *p); tm.tm_hour = MUL10(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_hour += i; p++; if (*p != ':') return -1; p++; /* Process minutes */ REQ_NUM(*p); ATONUM(i, *p); tm.tm_min = MUL10(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_min += i; p++; if (*p != ':') return -1; p++; /* Process seconds */ REQ_NUM(*p); ATONUM(i, *p); tm.tm_sec = MUL10(i); p++; REQ_NUM(*p); ATONUM(i, *p); tm.tm_sec += i; p++; /* Junk after datetime string should cause an error */ if (*p != '\0') return -1; skip_time: tm.tm_isdst = -1; /* Let the host library decide if DST is in effect */ secs = mktime(&tm); return secs; } jdupes-1.18.1/jody_strtoepoch.h000066400000000000000000000005511370142704600164710ustar00rootroot00000000000000/* Jody Bruchon's datetime-to-epoch conversion function * * Copyright (C) 2020 by Jody Bruchon * Released under The MIT License */ #ifndef JODY_STRTOEPOCH_H #define JODY_STRTOEPOCH_H #ifdef __cplusplus extern "C" { #endif time_t strtoepoch(const char * const datetime); #ifdef __cplusplus } #endif #endif /* JODY_STRTOEPOCH_H */ jdupes-1.18.1/jody_win_unicode.c000066400000000000000000000040541370142704600165770ustar00rootroot00000000000000/* Jody Bruchon's Windows Unicode helper routines * * Copyright (C) 2014-2020 by Jody Bruchon * Released under The MIT License */ #include "jody_win_unicode.h" #include "jdupes.h" #include #include #include #ifdef UNICODE static wpath_t wstr; /* Convert slashes to backslashes in a file path */ extern void slash_convert(char *path) { while (*path != '\0') { if (*path == '/') *path = '\\'; path++; } return; } /* Copy Windows wide character arguments to UTF-8 */ extern void widearg_to_argv(int argc, wchar_t **wargv, char **argv) { static char temp[PATHBUF_SIZE * 2]; int len; if (!argv) goto error_bad_argv; for (int counter = 0; counter < argc; counter++) { len = W2M(wargv[counter], &temp); if (len < 1) goto error_wc2mb; argv[counter] = (char *)string_malloc((size_t)len + 1); if (!argv[counter]) oom("widearg_to_argv()"); strncpy(argv[counter], temp, (size_t)len + 1); } return; error_bad_argv: fprintf(stderr, "fatal: bad argv pointer\n"); exit(EXIT_FAILURE); error_wc2mb: fprintf(stderr, "fatal: WideCharToMultiByte failed\n"); exit(EXIT_FAILURE); } #else #define slash_convert(a) #endif /* UNICODE */ /* Print a string that is wide on Windows but normal on POSIX */ extern int fwprint(FILE * const restrict stream, const char * const restrict str, const int cr) { #ifdef UNICODE int retval; int stream_mode = out_mode; if (stream == stderr) stream_mode = err_mode; if (stream_mode == _O_U16TEXT) { /* Convert to wide string and send to wide console output */ if (!M2W(str, wstr)) return -1; fflush(stream); _setmode(_fileno(stream), stream_mode); if (cr == 2) retval = fwprintf(stream, L"%S%C", wstr, 0); else retval = fwprintf(stream, L"%S%S", wstr, cr == 1 ? L"\n" : L""); fflush(stream); _setmode(_fileno(stream), _O_TEXT); return retval; } else { #endif if (cr == 2) return fprintf(stream, "%s%c", str, 0); else return fprintf(stream, "%s%s", str, cr == 1 ? "\n" : ""); #ifdef UNICODE } #endif } jdupes-1.18.1/jody_win_unicode.h000066400000000000000000000010771370142704600166060ustar00rootroot00000000000000/* Jody Bruchon's Windows Unicode helper routines * See jody_win_unicode.c for license information */ #ifndef JODY_WIN_UNICODE_H #define JODY_WIN_UNICODE_H #ifdef __cplusplus extern "C" { #endif #include "jdupes.h" #include extern int fwprint(FILE * const restrict stream, const char * const restrict str, const int cr); #ifdef UNICODE extern void slash_convert(char *path); extern void widearg_to_argv(int argc, wchar_t **wargv, char **argv); #else #define slash_convert(a) #endif /* UNICODE */ #ifdef __cplusplus } #endif #endif /* JODY_WIN_UNICODE_H */ jdupes-1.18.1/linux-dedupe-static.h000066400000000000000000000034121370142704600171410ustar00rootroot00000000000000/* * Copyright (C) 2007 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #ifndef JDUPES_BTRFS_H #define JDUPES_BTRFS_H #include #include #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_DEVICE_PATH_NAME_MAX 1024 #define FILE_DEDUPE_RANGE_SAME 0 #define FILE_DEDUPE_RANGE_DIFFERS 1 /* For extent-same ioctl */ struct file_dedupe_range_info { __s64 dest_fd; /* in - destination file */ __u64 dest_offset; /* in - start of extent in destination */ __u64 bytes_deduped; /* out - total # of bytes we were able * to dedupe from this file */ /* status of this dedupe operation: * 0 if dedup succeeds * < 0 for error * == FILE_DEDUPE_RANGE_DIFFERS if data differs */ __s32 status; /* out - see above description */ __u32 reserved; }; struct file_dedupe_range { __u64 src_offset; /* in - start of extent in source */ __u64 src_length; /* in - length of extent */ __u16 dest_count; /* in - total elements in info array */ __u16 reserved1; __u32 reserved2; struct file_dedupe_range_info info[0]; }; #define FIDEDUPERANGE _IOWR(BTRFS_IOCTL_MAGIC, 54, \ struct file_dedupe_range) #endif /* JDUPES_BTRFS_H */ jdupes-1.18.1/string_malloc.c000066400000000000000000000170021370142704600161010ustar00rootroot00000000000000/* * String table allocator * A replacement for malloc() for tables of fixed strings * * Copyright (C) 2015-2020 by Jody Bruchon * Released under The MIT License */ #include #include #include "string_malloc.h" /* Size of pages to allocate at once. Must be divisible by uintptr_t. * The maximum object size is this page size minus about 16 bytes! */ #ifndef SMA_PAGE_SIZE #define SMA_PAGE_SIZE 262144 #endif /* Max freed pointers to remember. Increasing this number allows storing * more free objects but can slow down allocations. Don't increase it if * the program's total reused freed alloc counter doesn't increase as a * result or you're slowing allocs down to no benefit. */ #ifndef SMA_MAX_FREE #define SMA_MAX_FREE 32 #endif #ifdef DEBUG uintmax_t sma_allocs = 0; uintmax_t sma_free_ignored = 0; uintmax_t sma_free_good = 0; uintmax_t sma_free_merged = 0; uintmax_t sma_free_replaced = 0; uintmax_t sma_free_reclaimed = 0; uintmax_t sma_free_scanned = 0; uintmax_t sma_free_tails = 0; #define DBG(a) a #else #define DBG(a) #endif /* This is used to bypass string_malloc for debugging */ #ifdef SMA_PASSTHROUGH void *string_malloc(size_t len) { return malloc(len); } void string_free(void *ptr) { free(ptr); return; } void string_malloc_destroy(void) { return; } #else /* Not SMA_PASSTHROUGH mode */ struct freelist { void *addr; size_t size; }; static void *sma_head = NULL; static uintptr_t *sma_curpage = NULL; static unsigned int sma_pages = 0; static struct freelist sma_freelist[SMA_MAX_FREE]; static int sma_freelist_cnt = 0; static size_t sma_nextfree = sizeof(uintptr_t); /* Scan the freed chunk list for a suitably sized object */ static inline void *scan_freelist(const size_t size) { size_t *object, *min_p; size_t sz, min = 0; int i, used = 0, min_i = -1; /* Don't bother scanning if the list is empty */ if (sma_freelist_cnt == 0) return NULL; for (i = 0; i < SMA_MAX_FREE; i++) { /* Stop scanning once we run out of valid entries */ if (used == sma_freelist_cnt) return NULL; DBG(sma_free_scanned++;) object = sma_freelist[i].addr; /* Skip empty entries */ if (object == NULL) continue; sz = sma_freelist[i].size; used++; /* Skip smaller objects */ if (sz < size) continue; /* Object is big enough; record if it's the new minimum */ if (min == 0 || sz <= min) { min = sz; min_i = i; /* Always stop scanning if exact sized object found */ if (sz == size) break; } } /* Enhancement TODO: split the free item if it's big enough */ /* Return smallest object found and delete from free list */ if (min_i != -1) { min_p = sma_freelist[min_i].addr; sma_freelist[min_i].addr = NULL; sma_freelist_cnt--; min_p++; return (void *)min_p; } /* Fall through - free list search failed */ return NULL; } /* malloc() a new page for string_malloc to use */ static inline void *string_malloc_page(void) { uintptr_t * restrict pageptr; /* Allocate page and set up pointers at page starts */ pageptr = (uintptr_t *)malloc(SMA_PAGE_SIZE); if (pageptr == NULL) return NULL; *pageptr = (uintptr_t)NULL; /* Link previous page to this page, if applicable */ if (sma_curpage != NULL) *sma_curpage = (uintptr_t)pageptr; /* Update last page pointers and total page counter */ sma_curpage = pageptr; sma_pages++; return (void *)pageptr; } void *string_malloc(size_t len) { const void * restrict page = (char *)sma_curpage; static size_t *address; /* Calling with no actual length is invalid */ if (len < 1) return NULL; /* Align objects where possible */ if (len & (sizeof(uintptr_t) - 1)) { len &= ~(sizeof(uintptr_t) - 1); len += sizeof(uintptr_t); } /* Pass-through allocations larger than maximum object size to malloc() */ if (len > (SMA_PAGE_SIZE - sizeof(uintptr_t) - sizeof(size_t))) { /* Allocate the space */ address = (size_t *)malloc(len + sizeof(size_t)); if (!address) return NULL; /* Prefix object with its size */ *address = len; address++; DBG(sma_allocs++;) return (void *)address; } /* Initialize on first use */ if (sma_pages == 0) { /* Initialize the freed object list */ for (int i = 0; i < SMA_MAX_FREE; i++) sma_freelist[i].addr = NULL; /* Allocate first page and set up for first allocation */ sma_head = string_malloc_page(); if (sma_head == NULL) return NULL; sma_nextfree = sizeof(uintptr_t); page = sma_head; } /* Allocate objects from the free list first */ address = (size_t *)scan_freelist(len); if (address != NULL) { DBG(sma_free_reclaimed++;) return (void *)address; } /* Allocate new page if this object won't fit */ if ((sma_nextfree + len + sizeof(size_t)) > SMA_PAGE_SIZE) { size_t sz; size_t *tailaddr; /* See if page tail has usable remaining capacity */ sz = sma_nextfree + sizeof(size_t) + sizeof(uintptr_t); /* Try to add page tail to free list rather than waste it */ if (sz <= SMA_PAGE_SIZE) { sz = SMA_PAGE_SIZE - sma_nextfree - sizeof(size_t); tailaddr = (size_t *)((uintptr_t)page + sma_nextfree); *tailaddr = (size_t)sz; tailaddr++; string_free(tailaddr); DBG(sma_free_tails++;) } page = string_malloc_page(); if (!page) return NULL; sma_nextfree = sizeof(uintptr_t); } /* Allocate the space */ address = (size_t *)((uintptr_t)page + sma_nextfree); /* Prefix object with its size */ *address = len; address++; sma_nextfree += len + sizeof(size_t); DBG(sma_allocs++;) return (void *)address; } /* Free an object, adding to free list if possible */ void string_free(void * const addr) { int freefull = 0; struct freelist *emptyslot = NULL; static uintptr_t before, after; static size_t *sizeptr; static size_t size; /* Do nothing on NULL address */ if (addr == NULL) goto sf_failed; /* Get address to real start of object and the object size */ sizeptr = (size_t *)addr - 1; size = *(size_t *)sizeptr; /* Calculate after-block pointer for merge checks */ after = (uintptr_t)addr + size; /* If free list is full, try to replace a smaller object */ if (sma_freelist_cnt == SMA_MAX_FREE) freefull = 1; /* Attempt to merge into other free objects */ for (int i = 0; i < SMA_MAX_FREE; i++) { /* Record first empty slot */ if (emptyslot == NULL && sma_freelist[i].addr == NULL) { emptyslot = &(sma_freelist[i]); // break; } else if (freefull != 0 && sma_freelist[i].size < size) { /* Replace object if list is full and new one is bigger */ emptyslot = &(sma_freelist[i]); DBG(sma_free_replaced++;) break; } else if ((uintptr_t)(sma_freelist[i].addr) == after) { /* Merge with a block after this one */ sma_freelist[i].addr = sizeptr; sma_freelist[i].size += (size + sizeof(size_t *)); DBG(sma_free_good++;) DBG(sma_free_merged++;) return; } else { before = (uintptr_t)addr + size; if (before == (uintptr_t)(sma_freelist[i].addr)) { /* Merge with a block before this one */ sma_freelist[i].size += (size + sizeof(size_t *)); DBG(sma_free_good++;) DBG(sma_free_merged++;) } } } /* Merges failed; add to empty slot (if any found) */ if (emptyslot != NULL) { if (emptyslot->addr == NULL) sma_freelist_cnt++; emptyslot->addr = sizeptr; emptyslot->size = size; DBG(sma_free_good++;) return; } /* Fall through */ sf_failed: DBG(sma_free_ignored++;) return; } /* Destroy all allocated pages */ void string_malloc_destroy(void) { uintptr_t *cur; uintptr_t *next; cur = sma_head; if (sma_head == NULL) return; while (sma_pages > 0) { next = (uintptr_t *)*cur; free(cur); cur = next; sma_pages--; } sma_head = NULL; return; } #endif /* SMA_PASSTHROUGH */ jdupes-1.18.1/string_malloc.h000066400000000000000000000012741370142704600161120ustar00rootroot00000000000000/* String table allocator * A replacement for malloc() for tables of fixed strings * See string_malloc.c for license information */ #ifndef STRING_MALLOC_H #define STRING_MALLOC_H #ifdef __cplusplus extern "C" { #endif #ifdef DEBUG extern uintmax_t sma_allocs; extern uintmax_t sma_free_ignored; extern uintmax_t sma_free_good; extern uintmax_t sma_free_merged; extern uintmax_t sma_free_replaced; extern uintmax_t sma_free_scanned; extern uintmax_t sma_free_reclaimed; extern uintmax_t sma_free_tails; #endif extern void *string_malloc(size_t len); extern void string_free(void * const addr); extern void string_malloc_destroy(void); #ifdef __cplusplus } #endif #endif /* STRING_MALLOC_H */ jdupes-1.18.1/stupid_dupes.sh000077500000000000000000000250541370142704600161550ustar00rootroot00000000000000#!/bin/bash # stupid_dupes: find duplicates like jdupes but more slowly with a shell script # Copyright (C) 2020 by Jody Bruchon # # The MIT License (MIT) # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in # the Software without restriction, including without limitation the rights to # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of # the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. PROGNAME=stupid_dupes.sh VER=1.0 VERDATE=2020-02-18 V=1 # Verbosity AC=0 # Argument count PHS=4096 # Partial hash size FQUICK=0 # Quick (no final compare) mode FICNT=0 # File index counter MSCNT=0 # Match set counter STATUS=0 # Exit status # A hash command that outputs a plain file hash (no file names) test -z "$HASHCMD" && HASHCMD=jodyhash # 'find' defaults to no-recurse FRECURSE="-maxdepth 1" # sort option (cat = none) test -z "$SORTCMD" && SORTCMD="cat" ### Function definitions # $1: file path to add add_file () { test $V -gt 1 && echo "add_file: '$1'" >&2 SZ="$(stat -c '%s' "$1" || echo FAIL)" if [ "$SZ" = "FAIL" ] then echo "error: add_file: can't stat '$1'" >&2 STATUS=1 return fi FICNT=$((FICNT + 1)) FILES[FICNT]="$1" SIZES[FICNT]="$SZ" PHASH[FICNT]="NULL" FHASH[FICNT]="NULL" test $V -gt 1 && echo "add_file: added as file number $FICNT" >&2 } # $1: hash to get (partial/full); $2: file # to hash get_filehash () { test $V -gt 1 && echo "get_filehash: $1:$2 '${FILES[$2]}'" >&2 test -z "${FILES[$2]}" && \ echo "internal error: get_filehash: bad file number passed" >&2 && exit 1 case "$1" in partial) PHASH[$2]="$(dd if="${FILES[$2]}" bs=4096 count=1 2>/dev/null | $HASHCMD || echo "FAIL")" test "${PHASH[$2]}" = "FAIL" && \ echo "get_filehash: hashing failed: '${FILES[$2]}'" >&2 && STATUS=1 ;; full) FHASH[$2]="$($HASHCMD "${FILES[$2]}" || echo "FAIL")" test "${FHASH[$2]}" = "FAIL" && \ echo "get_filehash: hashing failed: '${FILES[$2]}'" >&2 && STATUS=1 ;; *) echo "internal error: get_filehash: invalid hash type '$1'" >&2 exit 1; ;; esac test $V -gt 1 && echo "get_filehash: PHASH=${PHASH[$2]}" >&2 return 0 } # $1/$2: file numbers to check for a match check_match () { test $V -gt 1 && echo "check_match: checking: $1:'${FILES[$1]}', $2:'${FILES[$2]}'" >&2 # Sizes must match if [ ${SIZES[$1]} != ${SIZES[$2]} ] then test $V -gt 1 && \ echo "check_match: sizes differ: ${SIZES[$1]} != ${SIZES[$2]}" >&2 return 1 fi # Check partial hashes test "${PHASH[$1]}" = "NULL" && get_filehash partial "$1" test "${PHASH[$1]}" = "FAIL" && STATUS=1 && return 1 test "${PHASH[$2]}" = "NULL" && get_filehash partial "$2" test "${PHASH[$2]}" = "FAIL" && STATUS=1 && return 1 if [ "${PHASH[$1]}" != "${PHASH[$2]}" ] then test $V -gt 1 && echo "check_match: partial hashes don't match" >&2 return 1 else test $V -gt 1 && echo "check_match: partial hashes match" >&2 fi # Check full hashes test "{$FHASH[$1]}" = "NULL" && get_filehash full "$1" test "{$FHASH[$1]}" = "FAIL" && STATUS=1 && return 1 test "{$FHASH[$2]}" = "NULL" && get_filehash full "$2" test "{$FHASH[$2]}" = "FAIL" && STATUS=1 && return 1 if [ "${FHASH[$1]}" != "${FHASH[$2]}" ] then test $V -gt 1 && echo "check_match: full hashes don't match" >&2 return 1 else test $V -gt 1 && echo "check_match: full hashes match" >&2 fi # Byte-for-byte compare the files if [ $FQUICK -eq 1 ] || cmp -s "${FILES[$1]}" "${FILES[$2]}" then test $V -gt 1 && echo "check_match: files are identical" >&2 return 0 else test $V -gt 1 && echo "check_match: files are not identical" >&2 return 1 fi return 1 # should never be reached } add_to_matches () { test $V -gt 1 && echo "add_to_matches: adding: '${FILES[$1]}','${FILES[$2]}'" >&2 MSCNT=$((MSCNT + 1)) MLEFT[$MSCNT]=$1 MRIGHT[$MSCNT]=$2 MPROC[$MSCNT]=0 # Flips to 1 during final processing test $V -gt 1 && echo "add_to_matches: set $MSCNT = $1:$2" >&2 return 0 } print_matches () { test $V -gt 1 && echo "print_matches: running" >&2 FIRST=1 PRINTCNT=1 CURFILE=0 # Outer loop: find a match pair to start with while [ $PRINTCNT -le $MSCNT ] do test $V -gt 1 && echo " outer loop: print count $PRINTCNT, match count $MSCNT" >&2 # Don't reprint already-printed match pairings if [ ${MPROC[PRINTCNT]} -ne 0 ] then test $V -gt 1 && echo " skipping processed pair $PRINTCNT" >&2 PRINTCNT=$((PRINTCNT + 1)) continue fi CURFILE=${MLEFT[PRINTCNT]} # Print a newline before each new set EXCEPT the first set if [ $FIRST -eq 1 ]; then FIRST=0; else echo; fi echo "${FILES[CURFILE]}" # Inner loop: find match pairs to print CURCNT=$PRINTCNT; PREVCNT=1; unset PREV; PREV[1]=$CURFILE while [ $CURCNT -le $MSCNT ] do test $V -gt 1 && echo " inner loop: CC $CURCNT" >&2 test $V -gt 1 && echo " files: ${MLEFT[CURCNT]}:'${FILES[${MLEFT[CURCNT]}]}', ${MRIGHT[CURCNT]}:'${FILES[${MRIGHT[CURCNT]}]}'" >&2 if [ ${MPROC[CURCNT]} -ne 0 ] then test $V -gt 1 && echo " skipping processed pair $CURCNT" >&2 CURCNT=$((CURCNT + 1)) continue fi CURMATCH_L=0; CURMATCH_R=0; PCCNT=0 # For each pair, check both sides for any known match number while [ $PCCNT -lt $PREVCNT ] do PCCNT=$((PCCNT + 1)) test $V -gt 1 && echo -n " deep loop: $PCCNT <= $PREVCNT" >&2 test ${MLEFT[CURCNT]} -eq ${PREV[$PCCNT]} && CURMATCH_L=${MRIGHT[CURCNT]} test ${MRIGHT[CURCNT]} -eq ${PREV[$PCCNT]} && CURMATCH_R=${MLEFT[CURCNT]} test $V -gt 1 && echo ", curmatch: $CURMATCH = ${MLEFT[CURCNT]} < ${PREV[PCCNT]} > ${MRIGHT[CURCNT]}" >&2 # If both sides of this pair have been previously seen, # just flag the pair and print nothing. if [[ $CURMATCH_L -ne 0 && $CURMATCH_R -ne 0 ]] then MPROC[$CURCNT]=1 test $V -gt 1 && echo " Flagging: pair $CURCNT (${MLEFT[CURCNT]}:${MRIGHT[CURCNT]}) (R)" >&2 break fi done # If L or R match exists, we have a printable match CURMATCH=0 test $CURMATCH_L -ne 0 && test $CURMATCH_R -eq 0 && CURMATCH=$CURMATCH_L test $CURMATCH_R -ne 0 && test $CURMATCH_L -eq 0 && CURMATCH=$CURMATCH_R if [ $CURMATCH -ne 0 ] then echo "${FILES[CURMATCH]}" MPROC[$CURCNT]=1 test $V -gt 1 && echo " Flagging: pair $CURCNT (${MLEFT[CURCNT]}:${MRIGHT[CURCNT]})" >&2 PREVCNT=$((PREVCNT + 1)) PREV[$PREVCNT]=$CURMATCH fi CURCNT=$((CURCNT + 1)) done PRINTCNT=$((PRINTCNT + 1)) done test $V -gt 1 && echo "print_matches: complete" >&2 return 0 } show_help () { COPYTEXT="Copyright (C) 2020 by Jody Bruchon " echo "$PROGNAME $VER ($VERDATE)" if [ "$2" = "full" ] then echo "$COPYTEXT" echo -e "\nUsage: $PROGNAME [options] file_or_dir1 [more_files ...]\n" echo -e "Options:\n" echo "-r|--recurse Recurse into any subdirectories" echo "-q|--quiet Only show final output and errors" echo "-Q|--quick Skip the full file byte-for-byte comparison" echo "-D|--debug Show lots of extra debugging text" echo "-v|-V|--version Display program version and exit" echo "-h|--help Show this help text and exit" echo "--license Show the full program license text" echo -e "\njdupes is better than me. Get it at github.com/jbruchon/jdupes\n" fi if [ "$2" = "license" ] then echo "$COPYTEXT" echo -e "\nThe MIT License (MIT)\n" echo "Permission is hereby granted, free of charge, to any person obtaining a copy of" echo "this software and associated documentation files (the \"Software\"), to deal in" echo "the Software without restriction, including without limitation the rights to" echo "use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of" echo "the Software, and to permit persons to whom the Software is furnished to do so," echo -e "subject to the following conditions:\n" echo "The above copyright notice and this permission notice shall be included in all" echo -e "copies or substantial portions of the Software.\n" echo "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR" echo "IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS" echo "FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR" echo "COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER" echo "IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN" echo "CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." fi exit $1 } ### End function definitions ### Begin main program # Process arguments [[ "$@" = "" ]] && show_help 1 full for X in $@ do case "$X" in -q|--quiet) V=0 ;; -D|--debug) V=2 ;; -r|--recurse) FRECURSE="" ;; -Q|--quick) FQUICK=1 ;; -v|-V|--version) show_help 0 version ;; -h|--help) show_help 0 full ;; --license) show_help 0 license ;; *) AC=$((AC + 1)); ARGS[AC]="$X" ;; esac done test $V -gt 1 && echo -e "Command line: $0 $@" >&2 # Main loop ARGNUM=1 while [ $ARGNUM -le $AC ] do test $V -gt 1 && echo -e "Processing argument $ARGNUM: '${ARGS[ARGNUM]}'" >&2 if [[ ! -f "${ARGS[ARGNUM]}" && ! -d "${ARGS[ARGNUM]}" || -h "${ARGS[ARGNUM]}" ]] then echo "warning: not a regular file or directory: '${ARGS[ARGNUM]}'" >&2 STATUS=1 ARGNUM=$((ARGNUM + 1)) continue fi # Add files/dirs to the list, recursing as needed while read X do add_file "$X" done < <(find "${ARGS[ARGNUM]}" $FRECURSE -type f -size +0 | $SORTCMD) ARGNUM=$((ARGNUM + 1)) done # If there are not enough files, just exit with no matches test $FICNT -lt 2 && echo "No matches found." && exit $STATUS # Check every file pair for matches CNT=1 while [ $CNT -lt $FICNT ] do SCAN=$CNT while [ $SCAN -lt $FICNT ] do SCAN=$((SCAN + 1)) check_match $CNT $SCAN && add_to_matches $CNT $SCAN done CNT=$((CNT + 1)) done print_matches exit $STATUS jdupes-1.18.1/testdir/000077500000000000000000000000001370142704600145565ustar00rootroot00000000000000jdupes-1.18.1/testdir/.hidden_dir/000077500000000000000000000000001370142704600167255ustar00rootroot00000000000000jdupes-1.18.1/testdir/.hidden_dir/hiddendir_two000066400000000000000000000000041370142704600214650ustar00rootroot00000000000000two jdupes-1.18.1/testdir/.hidden_two000066400000000000000000000000041370142704600166750ustar00rootroot00000000000000two jdupes-1.18.1/testdir/block_size_tests/000077500000000000000000000000001370142704600201245ustar00rootroot00000000000000jdupes-1.18.1/testdir/block_size_tests/4095b_file1000066400000000000000000000077771370142704600217140ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that wojdupes-1.18.1/testdir/block_size_tests/4095b_file2000066400000000000000000000077771370142704600217150ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that wojdupes-1.18.1/testdir/block_size_tests/4096b_file1000066400000000000000000000100001370142704600216620ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that worjdupes-1.18.1/testdir/block_size_tests/4096b_file2000066400000000000000000000100001370142704600216630ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that worjdupes-1.18.1/testdir/block_size_tests/4097b_file1000066400000000000000000000100011370142704600216640ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that workjdupes-1.18.1/testdir/block_size_tests/4097b_file2000066400000000000000000000100011370142704600216650ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that workjdupes-1.18.1/testdir/extensions/000077500000000000000000000000001370142704600167555ustar00rootroot00000000000000jdupes-1.18.1/testdir/extensions/fake_doc_001.doc000066400000000000000000000000161370142704600215540ustar00rootroot00000000000000fake mp3 file jdupes-1.18.1/testdir/extensions/fake_doc_002.doc000066400000000000000000000000161370142704600215550ustar00rootroot00000000000000fake mp3 file jdupes-1.18.1/testdir/extensions/fake_mp3_001.mp3000066400000000000000000000000161370142704600214400ustar00rootroot00000000000000fake mp3 file jdupes-1.18.1/testdir/extensions/fake_mp3_002.mp3000066400000000000000000000000161370142704600214410ustar00rootroot00000000000000fake mp3 file jdupes-1.18.1/testdir/extensions/fake_mp4_001.mp4000066400000000000000000000000161370142704600214420ustar00rootroot00000000000000fake mp3 file jdupes-1.18.1/testdir/extensions/fake_mp4_002.mp4000066400000000000000000000000161370142704600214430ustar00rootroot00000000000000fake mp3 file jdupes-1.18.1/testdir/isolate/000077500000000000000000000000001370142704600162165ustar00rootroot00000000000000jdupes-1.18.1/testdir/isolate/1/000077500000000000000000000000001370142704600163565ustar00rootroot00000000000000jdupes-1.18.1/testdir/isolate/1/1.txt000066400000000000000000000000101370142704600172460ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/1/2.txt000066400000000000000000000000101370142704600172470ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/2/000077500000000000000000000000001370142704600163575ustar00rootroot00000000000000jdupes-1.18.1/testdir/isolate/2/3.txt000066400000000000000000000000101370142704600172510ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/2/4.txt000066400000000000000000000000101370142704600172520ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/3/000077500000000000000000000000001370142704600163605ustar00rootroot00000000000000jdupes-1.18.1/testdir/isolate/3/5.txt000066400000000000000000000000101370142704600172540ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/3/6.txt000066400000000000000000000000101370142704600172550ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/3/7.txt000066400000000000000000000000101370142704600172560ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/isolate/4/000077500000000000000000000000001370142704600163615ustar00rootroot00000000000000jdupes-1.18.1/testdir/isolate/4/8.txt000066400000000000000000000000101370142704600172600ustar00rootroot00000000000000isolate jdupes-1.18.1/testdir/larger_file_1000066400000000000000000002023601370142704600171770ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! jdupes-1.18.1/testdir/larger_file_2000066400000000000000000002023601370142704600172000ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! jdupes-1.18.1/testdir/larger_file_3000066400000000000000000002023601370142704600172010ustar00rootroot00000000000000Unlike the other large files, this one is intended to fail matching early. This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner progr jdupes-1.18.1/testdir/larger_file_4000066400000000000000000002023601370142704600172020ustar00rootroot00000000000000This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner program! :-P If you'll excuse me, I have to copy-paste like crazy now. Have fun! This file is a larger file than the other testdir files. Its purpose is to trigger code that works with files larger than the quick hash block size. Since I did not feel like typing out thousands of lines of text, this long line will be duplicated ad infinitum. If you don't like that, write your own duplicate scanner prog Unlike the other large files, this one is designed to fail matching later. jdupes-1.18.1/testdir/nine_upsidedown000066400000000000000000000000041370142704600176650ustar00rootroot00000000000000six jdupes-1.18.1/testdir/notsotinydupe1000066400000000000000000000001021370142704600174770ustar00rootroot00000000000000This is not quite such a small duplicate as the other duplicates. jdupes-1.18.1/testdir/notsotinydupe2000066400000000000000000000001021370142704600175000ustar00rootroot00000000000000This is not quite such a small duplicate as the other duplicates. jdupes-1.18.1/testdir/numeric_sort/000077500000000000000000000000001370142704600172675ustar00rootroot00000000000000jdupes-1.18.1/testdir/numeric_sort/file001000066400000000000000000000000041370142704600203440ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file001a000066400000000000000000000000041370142704600205050ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file002000066400000000000000000000000041370142704600203450ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file020000066400000000000000000000000041370142704600203450ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file021000066400000000000000000000000041370142704600203460ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file030000066400000000000000000000000041370142704600203460ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file1000066400000000000000000000000041370142704600202040ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file10000066400000000000000000000000041370142704600202640ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file100000066400000000000000000000000041370142704600203440ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file10a000066400000000000000000000000041370142704600204250ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file1a2000066400000000000000000000000041370142704600204270ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file2000066400000000000000000000000041370142704600202050ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort/file3000066400000000000000000000000041370142704600202060ustar00rootroot00000000000000foo jdupes-1.18.1/testdir/numeric_sort_2/000077500000000000000000000000001370142704600175105ustar00rootroot00000000000000jdupes-1.18.1/testdir/numeric_sort_2/file1-0 (1).jpg000066400000000000000000000000041370142704600216030ustar00rootroot00000000000000bar jdupes-1.18.1/testdir/numeric_sort_2/file1-0#1.jpg000066400000000000000000000000041370142704600214650ustar00rootroot00000000000000bar jdupes-1.18.1/testdir/numeric_sort_2/file1-0.jpg000066400000000000000000000000041370142704600213410ustar00rootroot00000000000000bar jdupes-1.18.1/testdir/numeric_sort_2/file1-1.jpg000066400000000000000000000000041370142704600213420ustar00rootroot00000000000000bar jdupes-1.18.1/testdir/numeric_sort_2/file1-10.jpg000066400000000000000000000000041370142704600214220ustar00rootroot00000000000000bar jdupes-1.18.1/testdir/numeric_sort_2/file1-2.jpg000066400000000000000000000000041370142704600213430ustar00rootroot00000000000000bar jdupes-1.18.1/testdir/recursed_a/000077500000000000000000000000001370142704600166725ustar00rootroot00000000000000jdupes-1.18.1/testdir/recursed_a/five000066400000000000000000000000051370142704600175410ustar00rootroot00000000000000five jdupes-1.18.1/testdir/recursed_a/five_2000066400000000000000000000000051370142704600177620ustar00rootroot00000000000000five jdupes-1.18.1/testdir/recursed_a/one000066400000000000000000000000041370142704600173700ustar00rootroot00000000000000one jdupes-1.18.1/testdir/recursed_a/one_2000066400000000000000000000000041370142704600176110ustar00rootroot00000000000000one jdupes-1.18.1/testdir/recursed_a/symlink_infinite_loop000077700000000000000000000000001370142704600254662../recursed_austar00rootroot00000000000000jdupes-1.18.1/testdir/recursed_a/two000066400000000000000000000000041370142704600174200ustar00rootroot00000000000000two jdupes-1.18.1/testdir/recursed_a/two_2000066400000000000000000000000041370142704600176410ustar00rootroot00000000000000two jdupes-1.18.1/testdir/recursed_b/000077500000000000000000000000001370142704600166735ustar00rootroot00000000000000jdupes-1.18.1/testdir/recursed_b/four000066400000000000000000000000051370142704600175640ustar00rootroot00000000000000four jdupes-1.18.1/testdir/recursed_b/one000066400000000000000000000000041370142704600173710ustar00rootroot00000000000000one jdupes-1.18.1/testdir/recursed_b/three000066400000000000000000000000061370142704600177210ustar00rootroot00000000000000three jdupes-1.18.1/testdir/recursed_b/two_plus_one000066400000000000000000000000061370142704600213270ustar00rootroot00000000000000three jdupes-1.18.1/testdir/recursed_c/000077500000000000000000000000001370142704600166745ustar00rootroot00000000000000jdupes-1.18.1/testdir/recursed_c/five000066400000000000000000000000051370142704600175430ustar00rootroot00000000000000five jdupes-1.18.1/testdir/recursed_c/level2/000077500000000000000000000000001370142704600200655ustar00rootroot00000000000000jdupes-1.18.1/testdir/recursed_c/level2/five000066400000000000000000000000051370142704600207340ustar00rootroot00000000000000five jdupes-1.18.1/testdir/recursed_c/level2/one000066400000000000000000000000041370142704600205630ustar00rootroot00000000000000one jdupes-1.18.1/testdir/recursed_c/level2/two000066400000000000000000000000041370142704600206130ustar00rootroot00000000000000two jdupes-1.18.1/testdir/recursed_c/one000066400000000000000000000000041370142704600173720ustar00rootroot00000000000000one jdupes-1.18.1/testdir/recursed_c/two000066400000000000000000000000041370142704600174220ustar00rootroot00000000000000two jdupes-1.18.1/testdir/symlink_dir000077700000000000000000000000001370142704600210572recursed_austar00rootroot00000000000000jdupes-1.18.1/testdir/symlink_test/000077500000000000000000000000001370142704600173035ustar00rootroot00000000000000jdupes-1.18.1/testdir/symlink_test/regular_file000066400000000000000000000000221370142704600216600ustar00rootroot00000000000000symlink test file jdupes-1.18.1/testdir/symlink_test/symlinked_file000077700000000000000000000000001370142704600246022regular_fileustar00rootroot00000000000000jdupes-1.18.1/testdir/symlink_twice_one000077700000000000000000000000001370142704600207522twoustar00rootroot00000000000000jdupes-1.18.1/testdir/symlink_two000077700000000000000000000000001370142704600176072twoustar00rootroot00000000000000jdupes-1.18.1/testdir/tinydupe1000066400000000000000000000000011370142704600164120ustar00rootroot00000000000000 jdupes-1.18.1/testdir/tinydupe2000066400000000000000000000000011370142704600164130ustar00rootroot00000000000000 jdupes-1.18.1/testdir/twice_one000066400000000000000000000000041370142704600164470ustar00rootroot00000000000000two jdupes-1.18.1/testdir/two000066400000000000000000000000041370142704600153040ustar00rootroot00000000000000two jdupes-1.18.1/testdir/unicode_dirnames/000077500000000000000000000000001370142704600200665ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_dirnames/Ελληνιά/000077500000000000000000000000001370142704600253535ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_dirnames/Ελληνιά/Unicode testfile.txt000066400000000000000000000000301370142704600312730ustar00rootroot00000000000000до свиданияjdupes-1.18.1/testdir/unicode_dirnames/до свидания/000077500000000000000000000000001370142704600275545ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_dirnames/до свидания/Unicode testfile.txt000066400000000000000000000000301370142704600334740ustar00rootroot00000000000000до свиданияjdupes-1.18.1/testdir/unicode_dirnames/दसविदानिया/000077500000000000000000000000001370142704600332275ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_dirnames/दसविदानिया/Unicode testfile.txt000066400000000000000000000000301370142704600371470ustar00rootroot00000000000000до свиданияjdupes-1.18.1/testdir/unicode_dirnames/怖い/000077500000000000000000000000001370142704600221115ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_dirnames/怖い/Unicode testfile.txt000066400000000000000000000000301370142704600260310ustar00rootroot00000000000000до свиданияjdupes-1.18.1/testdir/unicode_dirnames/행운을 빈다/000077500000000000000000000000001370142704600254365ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_dirnames/행운을 빈다/Unicode testfile.txt000066400000000000000000000000301370142704600313560ustar00rootroot00000000000000до свиданияjdupes-1.18.1/testdir/unicode_filenames/000077500000000000000000000000001370142704600202275ustar00rootroot00000000000000jdupes-1.18.1/testdir/unicode_filenames/Ελληνιά000066400000000000000000000001061370142704600254340ustar00rootroot00000000000000oh hi, this file has a Japanese name for testing this program against!jdupes-1.18.1/testdir/unicode_filenames/до свидания000066400000000000000000000001061370142704600276350ustar00rootroot00000000000000oh hi, this file has a Japanese name for testing this program against!jdupes-1.18.1/testdir/unicode_filenames/दसविदानिया000066400000000000000000000001061370142704600333100ustar00rootroot00000000000000oh hi, this file has a Japanese name for testing this program against!jdupes-1.18.1/testdir/unicode_filenames/怖い000066400000000000000000000001061370142704600221720ustar00rootroot00000000000000oh hi, this file has a Japanese name for testing this program against!jdupes-1.18.1/testdir/unicode_filenames/행운을 빈다000066400000000000000000000001061370142704600255170ustar00rootroot00000000000000oh hi, this file has a Japanese name for testing this program against!jdupes-1.18.1/testdir/with spaces a000066400000000000000000000000141370142704600171070ustar00rootroot00000000000000with spaces jdupes-1.18.1/testdir/with spaces b000066400000000000000000000000141370142704600171100ustar00rootroot00000000000000with spaces jdupes-1.18.1/testdir/zero_a000066400000000000000000000000001370142704600157460ustar00rootroot00000000000000jdupes-1.18.1/testdir/zero_b000066400000000000000000000000001370142704600157470ustar00rootroot00000000000000jdupes-1.18.1/tune_winres.sh000077500000000000000000000011141370142704600157760ustar00rootroot00000000000000#!/bin/sh WINRES="winres.rc" # Get version number components VER="$(grep '^#define VER "' version.h | cut -d\" -f2)" V1="$(echo "$VER" | cut -d. -f1)"; test -z "$V1" && V1=0 V2="$(echo "$VER" | cut -d. -f2)"; test -z "$V2" && V2=0 V3="$(echo "$VER" | cut -d. -f3)"; test -z "$V3" && V3=0 V4="$(echo "$VER" | cut -d. -f4)"; test -z "$V4" && V4=0 PRODVER="$V1,$V2,$V3,$V4" echo "$VER = $PRODVER" # Actually change the manifest version information sed -i 's/\([A-Z]*\)VERSION [0-9],.*/\1VERSION '"$PRODVER/"';s/"\([A-Za-z]*\)Version", "[0-9],.*"/"\1Version", '"\"$PRODVER\"/" "$WINRES" jdupes-1.18.1/version.h000066400000000000000000000004001370142704600147300ustar00rootroot00000000000000/* VERSION determines the program's version number * This file is part of jdupes; see jdupes.c for license information */ #ifndef JDUPES_VERSION_H #define JDUPES_VERSION_H #define VER "1.18.1" #define VERDATE "2020-07-08" #endif /* JDUPES_VERSION_H */ jdupes-1.18.1/win_stat.c000066400000000000000000000044701370142704600151010ustar00rootroot00000000000000/* * Windows-native code for getting stat()-like information * * Copyright (C) 2016-2020 by Jody Bruchon * Released under The MIT License */ #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #include #include "win_stat.h" #include #define WPATH_MAX 8192 #define M2W(a,b) MultiByteToWideChar(CP_UTF8, 0, a, -1, (LPWSTR)b, WPATH_MAX) /* Convert NT epoch to UNIX epoch */ static time_t nttime_to_unixtime(const uint64_t * const restrict timestamp) { uint64_t newstamp; memcpy(&newstamp, timestamp, sizeof(uint64_t)); newstamp /= 10000000LL; if (newstamp <= 11644473600LL) return 0; newstamp -= 11644473600LL; return (time_t)newstamp; } /* Get stat()-like extra information for a file on Windows */ int win_stat(const char * const filename, struct winstat * const restrict buf) { HANDLE hFile; BY_HANDLE_FILE_INFORMATION bhfi; uint64_t timetemp; #ifdef UNICODE static wchar_t wname2[WPATH_MAX]; if (!buf) return -127; if (!M2W(filename,wname2)) return -126; hFile = CreateFileW(wname2, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); #else if (!buf) return -127; hFile = CreateFile(filename, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); #endif if (hFile == INVALID_HANDLE_VALUE) goto failure; if (!GetFileInformationByHandle(hFile, &bhfi)) goto failure2; buf->st_ino = ((uint64_t)(bhfi.nFileIndexHigh) << 32) + (uint64_t)bhfi.nFileIndexLow; buf->st_size = ((int64_t)(bhfi.nFileSizeHigh) << 32) + (int64_t)bhfi.nFileSizeLow; timetemp = ((uint64_t)(bhfi.ftCreationTime.dwHighDateTime) << 32) + bhfi.ftCreationTime.dwLowDateTime; buf->st_ctime = nttime_to_unixtime(&timetemp); timetemp = ((uint64_t)(bhfi.ftLastWriteTime.dwHighDateTime) << 32) + bhfi.ftLastWriteTime.dwLowDateTime; buf->st_mtime = nttime_to_unixtime(&timetemp); timetemp = ((uint64_t)(bhfi.ftLastAccessTime.dwHighDateTime) << 32) + bhfi.ftLastAccessTime.dwLowDateTime; buf->st_atime = nttime_to_unixtime(&timetemp); buf->st_dev = (uint32_t)bhfi.dwVolumeSerialNumber; buf->st_nlink = (uint32_t)bhfi.nNumberOfLinks; buf->st_mode = (uint32_t)bhfi.dwFileAttributes; CloseHandle(hFile); return 0; failure: CloseHandle(hFile); return -1; failure2: CloseHandle(hFile); return -2; } jdupes-1.18.1/win_stat.h000066400000000000000000000026721370142704600151100ustar00rootroot00000000000000/* Windows-native routines for getting stat()-like information * See win_stat.c for license information */ #ifndef WIN_STAT_H #define WIN_STAT_H #ifdef __cplusplus extern "C" { #endif #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MAN #endif #include #include struct winstat { uint64_t st_ino; int64_t st_size; uint32_t st_dev; uint32_t st_nlink; uint32_t st_mode; time_t st_ctime; time_t st_mtime; time_t st_atime; }; /* stat() macros for Windows "mode" flags (file attributes) */ #define S_ISARCHIVE(st_mode) ((st_mode & FILE_ATTRIBUTE_ARCHIVE) ? 1 : 0) #define S_ISRO(st_mode) ((st_mode & FILE_ATTRIBUTE_READONLY) ? 1 : 0) #define S_ISHIDDEN(st_mode) ((st_mode & FILE_ATTRIBUTE_HIDDEN) ? 1 : 0) #define S_ISSYSTEM(st_mode) ((st_mode & FILE_ATTRIBUTE_SYSTEM) ? 1 : 0) #define S_ISCRYPT(st_mode) ((st_mode & FILE_ATTRIBUTE_ENCRYPTED) ? 1 : 0) #define S_ISDIR(st_mode) ((st_mode & FILE_ATTRIBUTE_DIRECTORY) ? 1 : 0) #define S_ISCOMPR(st_mode) ((st_mode & FILE_ATTRIBUTE_COMPRESSED) ? 1 : 0) #define S_ISREPARSE(st_mode) ((st_mode & FILE_ATTRIBUTE_REPARSE) ? 1 : 0) #define S_ISSPARSE(st_mode) ((st_mode & FILE_ATTRIBUTE_SPARSE) ? 1 : 0) #define S_ISTEMP(st_mode) ((st_mode & FILE_ATTRIBUTE_TEMPORARY) ? 1 : 0) #define S_ISREG(st_mode) ((st_mode & FILE_ATTRIBUTE_DIRECTORY) ? 0 : 1) extern int win_stat(const char * const filename, struct winstat * const restrict buf); #ifdef __cplusplus } #endif #endif /* WIN_STAT_H */ jdupes-1.18.1/winres.manifest.xml000066400000000000000000000006751370142704600167460ustar00rootroot00000000000000 true jdupes-1.18.1/winres.rc000066400000000000000000000015321370142704600147360ustar00rootroot00000000000000#include "winver.h" 1 24 winres.manifest.xml VS_VERSION_INFO VERSIONINFO FILEVERSION 1,18,1,0 PRODUCTVERSION 1,18,1,0 FILEFLAGSMASK 0x3fL FILEFLAGS 0x0L FILEOS 0x40004L FILETYPE 0x1L FILESUBTYPE 0x0L BEGIN BLOCK "StringFileInfo" BEGIN BLOCK "040904b0" BEGIN VALUE "Comments", "(C) 2015-2020 Jody Bruchon and contributors, publised under The MIT License" VALUE "CompanyName", "Jody Bruchon" VALUE "FileDescription", "jdupes Duplicate File Finder Tool" VALUE "FileVersion", "1,18,1,0" VALUE "InternalName", "jdupes" VALUE "LegalCopyright", "(C) 2015-2020 Jody Bruchon and contributors" VALUE "OriginalFilename", "jdupes.exe" VALUE "ProductName", "jdupes" VALUE "ProductVersion", "1,18,1,0" END END BLOCK "VarFileInfo" BEGIN VALUE "Translation", 0x409, 1200 END END jdupes-1.18.1/xxhash.c000066400000000000000000000725651370142704600145660ustar00rootroot00000000000000/* * xxHash - Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You can contact the author at : * - xxHash homepage: http://www.xxhash.com * - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* ************************************* * Tuning parameters ***************************************/ /*!XXH_FORCE_MEMORY_ACCESS : * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. * The below switch allow to select different access method for improved performance. * Method 0 (default) : use `memcpy()`. Safe and portable. * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. * It can generate buggy code on targets which do not support unaligned memory accesses. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) * See http://stackoverflow.com/a/32095106/646947 for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 # elif defined(__INTEL_COMPILER) || \ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ || defined(__ARM_ARCH_7S__) )) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif /*!XXH_ACCEPT_NULL_INPUT_POINTER : * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. * When this macro is enabled, xxHash actively checks input for null pointer. * It it is, result for null input pointers is the same as a null-length input. */ #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ # define XXH_ACCEPT_NULL_INPUT_POINTER 0 #endif /*!XXH_FORCE_NATIVE_FORMAT : * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. * Should endian-independence be of no importance for your application, you may set the #define below to 1, * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ # define XXH_FORCE_NATIVE_FORMAT 0 #endif /*!XXH_FORCE_ALIGN_CHECK : * This is a minor performance trick, only useful with lots of very small keys. * It means : check for aligned/unaligned input. * The check costs one initial branch per hash; * set it to 0 when the input is guaranteed to be aligned, * or when alignment doesn't matter for performance. */ #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) # define XXH_FORCE_ALIGN_CHECK 0 # else # define XXH_FORCE_ALIGN_CHECK 1 # endif #endif /* ************************************* * Includes & Memory related functions ***************************************/ /*! Modify the local functions below should you wish to use some other memory routines * for malloc(), free() */ #include static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free (void* p) { free(p); } /*! and for memcpy() */ #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } #define XXH_STATIC_LINKING_ONLY #include "xxhash.h" /* ************************************* * Compiler Specific Options ***************************************/ #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # define FORCE_INLINE static __forceinline #else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else # define FORCE_INLINE static inline # endif # else # define FORCE_INLINE static # endif /* __STDC_VERSION__ */ #endif /* ************************************* * Basic Types ***************************************/ #ifndef MEM_MODULE # if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; # else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; } __attribute__((packed)) unalign; static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U32 XXH_read32(const void* memPtr) { U32 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ /* **************************************** * Compiler-specific Functions and Macros ******************************************/ #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ #if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong #elif XXH_GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 #else static U32 XXH_swap32 (U32 x) { return ((x << 24) & 0xff000000 ) | ((x << 8) & 0x00ff0000 ) | ((x >> 8) & 0x0000ff00 ) | ((x >> 24) & 0x000000ff ); } #endif /* ************************************* * Architecture Macros ***************************************/ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ #ifndef XXH_CPU_LITTLE_ENDIAN static const int g_one = 1; # define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) #endif /* *************************** * Memory reads *****************************/ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); else return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); } FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } static U32 XXH_readBE32(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } /* ************************************* * Macros ***************************************/ #define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* * 32-bits hash functions *********************************************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; static const U32 PRIME32_3 = 3266489917U; static const U32 PRIME32_4 = 668265263U; static const U32 PRIME32_5 = 374761393U; static U32 XXH32_round(U32 seed, U32 input) { seed += input * PRIME32_2; seed = XXH_rotl32(seed, 13); seed *= PRIME32_1; return seed; } FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U32 h32; #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } #endif if (len>=16) { const BYTE* const limit = bEnd - 16; U32 v1 = seed + PRIME32_1 + PRIME32_2; U32 v2 = seed + PRIME32_2; U32 v3 = seed + 0; U32 v4 = seed - PRIME32_1; do { v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; } while (p<=limit); h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (U32) len; while (p+4<=bEnd) { h32 += XXH_get32bits(p) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; p+=4; } while (p> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH32_state_t state; XXH32_reset(&state, seed); XXH32_update(&state, input, len); return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /*====== Hash streaming ======*/ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) { return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; /* do not write into reserved, planned to be removed in a future version */ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; #endif state->total_len_32 += (unsigned)len; state->large_len |= (unsigned)(len >= 16) | (state->total_len_32>=16); if (state->memsize + len < 16) { /* fill in tmp buffer */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); state->memsize += (unsigned)len; return XXH_OK; } if (state->memsize) { /* some data left from previous update */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); { const U32* p32 = state->mem32; state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); } p += 16-state->memsize; state->memsize = 0; } if (p <= bEnd-16) { const BYTE* const limit = bEnd - 16; U32 v1 = state->v1; U32 v2 = state->v2; U32 v3 = state->v3; U32 v4 = state->v4; do { v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_update_endian(state_in, input, len, XXH_littleEndian); else return XXH32_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem32; const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; U32 h32; if (state->large_len) { h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; while (p+4<=bEnd) { h32 += XXH_readLE32(p, endian) * PRIME32_3; h32 = XXH_rotl32(h32, 17) * PRIME32_4; p+=4; } while (p> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH32_digest_endian(state_in, XXH_littleEndian); else return XXH32_digest_endian(state_in, XXH_bigEndian); } /*====== Canonical representation ======*/ /*! Default XXH result types are basic unsigned 32 and 64 bits. * The canonical representation follows human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file or buffer, remaining comparable across different systems. */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) { return XXH_readBE32(src); } #ifndef XXH_NO_LONG_LONG /* ******************************************************************* * 64-bits hash functions *********************************************************************/ /*====== Memory access ======*/ #ifndef MEM_MODULE # define MEM_MODULE # if !defined (__VMS) \ && (defined (__cplusplus) \ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint64_t U64; # else /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ typedef unsigned long long U64; # endif #endif #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } #else /* portable and safe solution. Generally efficient. * see : http://stackoverflow.com/a/32095106/646947 */ static U64 XXH_read64(const void* memPtr) { U64 val; memcpy(&val, memPtr, sizeof(val)); return val; } #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap64 _byteswap_uint64 #elif XXH_GCC_VERSION >= 403 # define XXH_swap64 __builtin_bswap64 #else static U64 XXH_swap64 (U64 x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } #endif FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); else return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); } FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } static U64 XXH_readBE64(const void* ptr) { return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } /*====== xxh64 ======*/ static const U64 PRIME64_1 = 11400714785074694791ULL; static const U64 PRIME64_2 = 14029467366897019727ULL; static const U64 PRIME64_3 = 1609587929392839161ULL; static const U64 PRIME64_4 = 9650029242287828579ULL; static const U64 PRIME64_5 = 2870177450012600261ULL; static U64 XXH64_round(U64 acc, U64 input) { acc += input * PRIME64_2; acc = XXH_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static U64 XXH64_mergeRound(U64 acc, U64 val) { val = XXH64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) { const BYTE* p = (const BYTE*)input; const BYTE* bEnd = p + len; U64 h64; #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } #endif if (len>=32) { const BYTE* const limit = bEnd - 32; U64 v1 = seed + PRIME64_1 + PRIME64_2; U64 v2 = seed + PRIME64_2; U64 v3 = seed + 0; U64 v4 = seed - PRIME64_1; do { v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; } while (p<=limit); h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (U64) len; while (p+8<=bEnd) { U64 const k1 = XXH64_round(0, XXH_get64bits(p)); h64 ^= k1; h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; p+=8; } if (p+4<=bEnd) { h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ XXH64_state_t state; XXH64_reset(&state, seed); XXH64_update(&state, input, len); return XXH64_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); } } if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); else return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); #endif } /*====== Hash Streaming ======*/ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) { return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); } XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { XXH_free(statePtr); return XXH_OK; } XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) { memcpy(dstState, srcState, sizeof(*dstState)); } XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) { XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; /* do not write into reserved, planned to be removed in a future version */ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); return XXH_OK; } FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; const BYTE* const bEnd = p + len; if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) return XXH_OK; #else return XXH_ERROR; #endif state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); state->memsize += (U32)len; return XXH_OK; } if (state->memsize) { /* tmp buffer is full */ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); p += 32-state->memsize; state->memsize = 0; } if (p+32 <= bEnd) { const BYTE* const limit = bEnd - 32; U64 v1 = state->v1; U64 v2 = state->v2; U64 v3 = state->v3; U64 v4 = state->v4; do { v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; } while (p<=limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < bEnd) { XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); state->memsize = (unsigned)(bEnd-p); } return XXH_OK; } XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_update_endian(state_in, input, len, XXH_littleEndian); else return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem64; const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; U64 h64; if (state->total_len >= 32) { U64 const v1 = state->v1; U64 const v2 = state->v2; U64 const v3 = state->v3; U64 const v4 = state->v4; h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); h64 = XXH64_mergeRound(h64, v1); h64 = XXH64_mergeRound(h64, v2); h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { h64 = state->v3 + PRIME64_5; } h64 += (U64) state->total_len; while (p+8<=bEnd) { U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); h64 ^= k1; h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; p+=8; } if (p+4<=bEnd) { h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p+=4; } while (p> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) return XXH64_digest_endian(state_in, XXH_littleEndian); else return XXH64_digest_endian(state_in, XXH_bigEndian); } /*====== Canonical representation ======*/ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); memcpy(dst, &hash, sizeof(*dst)); } XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) { return XXH_readBE64(src); } #endif /* XXH_NO_LONG_LONG */ jdupes-1.18.1/xxhash.h000066400000000000000000000304111370142704600145530ustar00rootroot00000000000000/* xxHash - Extremely Fast Hash algorithm Header File Copyright (C) 2012-2016, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - xxHash source repository : https://github.com/Cyan4973/xxHash */ /* Notice extracted from xxHash homepage : xxHash is an extremely fast Hash algorithm, running at RAM speed limits. It also successfully passes all tests from the SMHasher suite. Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) Name Speed Q.Score Author xxHash 5.4 GB/s 10 CrapWow 3.2 GB/s 2 Andrew MumurHash 3a 2.7 GB/s 10 Austin Appleby SpookyHash 2.0 GB/s 10 Bob Jenkins SBox 1.4 GB/s 9 Bret Mulvey Lookup3 1.2 GB/s 9 Bob Jenkins SuperFastHash 1.2 GB/s 1 Paul Hsieh CityHash64 1.05 GB/s 10 Pike & Alakuijala FNV 0.55 GB/s 5 Fowler, Noll, Vo CRC32 0.43 GB/s 9 MD5-32 0.33 GB/s 10 Ronald L. Rivest SHA1-32 0.28 GB/s 10 Q.Score is a measure of quality of the hash function. It depends on successfully passing SMHasher test set. 10 is a perfect score. A 64-bits version, named XXH64, is available since r35. It offers much better speed, but for 64-bits applications only. Name Speed on 64 bits Speed on 32 bits XXH64 13.8 GB/s 1.9 GB/s XXH32 6.8 GB/s 6.0 GB/s */ #ifndef XXHASH_H_5627135585666179 #define XXHASH_H_5627135585666179 1 #if defined (__cplusplus) extern "C" { #endif /* **************************** * Definitions ******************************/ #include /* size_t */ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /* **************************** * API modifier ******************************/ /** XXH_PRIVATE_API * This is useful to include xxhash functions in `static` mode * in order to inline them, and remove their symbol from the public list. * Methodology : * #define XXH_PRIVATE_API * #include "xxhash.h" * `xxhash.c` is automatically included. * It's not useful to compile and link it as a separate module. */ #ifdef XXH_PRIVATE_API # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif # if defined(__GNUC__) # define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline # elif defined(_MSC_VER) # define XXH_PUBLIC_API static __inline # else /* this version may generate warnings for unused static functions */ # define XXH_PUBLIC_API static # endif #else # define XXH_PUBLIC_API /* do nothing */ #endif /* XXH_PRIVATE_API */ /*!XXH_NAMESPACE, aka Namespace Emulation : If you want to include _and expose_ xxHash functions from within your own library, but also want to avoid symbol collisions with other libraries which may also include xxHash, you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). Note that no change is required within the calling program as long as it includes `xxhash.h` : regular symbol name will be automatically translated by this header. */ #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) #endif /* ************************************* * Version ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 #define XXH_VERSION_RELEASE 3 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); /*-********************************************************************** * 32-bits hash ************************************************************************/ typedef unsigned int XXH32_hash_t; /*! XXH32() : Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); /*====== Streaming ======*/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); /* These functions generate the xxHash of an input provided in multiple segments. Note that, for small input, they are slower than single-call functions, due to state management. For small input, prefer `XXH32()` and `XXH64()` . XXH state must first be allocated, using XXH*_createState() . Start a new hash by initializing state with a seed, using XXH*_reset(). Then, feed the hash state by calling XXH*_update() as many times as necessary. Obviously, input must be allocated and read accessible. The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. Finally, a hash value can be produced anytime, by using XXH*_digest(). This function returns the nn-bits hash as an int or long long. It's still possible to continue inserting input into the hash state after a digest, and generate some new hashes later on, by calling again XXH*_digest(). When done, free XXH state space if it was allocated dynamically. */ /*====== Canonical representation ======*/ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. * The canonical representation uses human-readable write convention, aka big-endian (large digits first). * These functions allow transformation of hash result into and from its canonical format. * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. */ #ifndef XXH_NO_LONG_LONG /*-********************************************************************** * 64-bits hash ************************************************************************/ typedef unsigned long long XXH64_hash_t; /*! XXH64() : Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". "seed" can be used to alter the result predictably. This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). */ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); /*====== Streaming ======*/ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); /*====== Canonical representation ======*/ typedef struct { unsigned char digest[8]; } XXH64_canonical_t; XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); #endif /* XXH_NO_LONG_LONG */ #ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ This section contains declarations which are not guaranteed to remain stable. They may change in future versions, becoming incompatible with a different version of the library. These declarations should only be used with static linking. Never use them in association with dynamic linking ! =================================================================================================== */ /* These definitions are only meant to make possible static allocation of XXH state, on stack or in a struct for example. Never use members directly. */ struct XXH32_state_s { unsigned total_len_32; unsigned large_len; unsigned v1; unsigned v2; unsigned v3; unsigned v4; unsigned mem32[4]; /* buffer defined as U32 for alignment */ unsigned memsize; unsigned reserved; /* never read nor write, will be removed in a future version */ }; /* typedef'd to XXH32_state_t */ #ifndef XXH_NO_LONG_LONG /* remove 64-bits support */ struct XXH64_state_s { unsigned long long total_len; unsigned long long v1; unsigned long long v2; unsigned long long v3; unsigned long long v4; unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ unsigned memsize; unsigned reserved[2]; /* never read nor write, will be removed in a future version */ }; /* typedef'd to XXH64_state_t */ #endif #ifdef XXH_PRIVATE_API # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ #endif #endif /* XXH_STATIC_LINKING_ONLY */ #if defined (__cplusplus) } #endif #endif /* XXHASH_H_5627135585666179 */