Last Updated: February 25, 2016
· daraff

command line magic examples with grep / cut / awk / sort

Here are some command line examples, which i've used over time. This are personal notes, but maybe you can also profit.

# output a filelist based on a zipped file
# grep with a regex and generate a list just with the matching pattern (-o)
# split the string by / occurence and take the second match (-f2)
# reorder the string with awk
# do a numerical sort in reverse order
# put the result in a file
less nzz-200k-article-export-2014-02-19.tar.gz | grep -e "200k-articles/.*" -o | cut -d / -f2 | awk -F"[.-]" '{ print $3 ";_1." $2 ";" $0 }' | sort -nr > test.txt
# find all files in a dir/subdirs and copy them in another directory
find /dirWithALotOfSubDirs/ -type f | xargs -n1 -I{} sh -c 'cp "{}" /dirWithJustFiles'
# sed: take entry 15-100 from a list
# split the string by ; and take the 4th match
# execute a shell command which contains a xml grep based on the match (path has to be calculated)
# do another xargs grep with a filelist
# take the 6th match with cut
# put the result in a file
sed -n '15,100p' list.txt | cut -d ";" -f4 | xargs -I{} sh -c 'grep -El "FormalName=\"Article\"" "xmlFolder/{}"' | xargs grep -El "body\.content" {} | cut -d "/" -f6 > output.txt
# do some http requests based on a file list of article id's
# awk: take the first argument from the list article-ids
# while: loop over the list and pass the id to the curl request and save the result with the tee command in a log file
awk '{ print $1}' article-ids | while read line; do echo -e "\n\narticle-id: $line" | tee -a response.log; curl -s -u 'user:password' "$line" | tee -a response.log; sleep 1; done
# generates a result.txt which just contains lines, which are in allXmls.txt, but not in xmlsToBeFilteres.txt
# attention, comm expects files to be lexicographically sorted (default sort). If you have numerical sorted files, you need to use sort command before (like in the example)
comm -23 <(sort allXmls.txt) <(sort xmlsToBeFilteres.txt) > result.txt
# list directory in reverted order and show one file per line
ls -r1
# tutorial -
find nzzs20140223-moved-articles -type f | xargs sed -i 's/<Text Ressort="me" RessortLangtext="  Meinungen"/<Text Ressort="hg" RessortLangtext="  Hintergrund"/g'
# delete a line based on a sed regex in all *.json files
# 1. find all json files
# 2. search in all files found for "isNewsnt": false
# 3. and delete them -> -i setting
find ./ -name "*.json" | xargs -n1 -I{} sed -i '/"isNewsnt": false/d' {}