From cdd986acf8d8575920edf3c4c5351dbc561a8e64 Mon Sep 17 00:00:00 2001
From: "G. Richardson" <grichardson@kivitec.de>
Date: Thu, 14 Feb 2019 12:01:10 +0100
Subject: [PATCH] =?utf8?q?Inventory=20Controller=20-=20Datenbankoptimierun?=
 =?utf8?q?gen=20f=C3=BCr=20mini=5Fjournal?=
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Aus Datenbanksicht war das Inventory mini-journal eine Katastrophe.

Die trans_id Abfrage fÃ¼hrte zu einem ersten Seq Scan auf der Tabelle inventory.
  my $query = 'SELECT trans_id FROM inventory GROUP BY trans_id ORDER BY max(itime) DESC LIMIT 10';

Die Rose Manager Abfrage fÃ¼hrte dann zu einem zweiten Seq Scan auf der Tabelle inventory:
 $objs = SL::DB::Manager::Inventory->get_all(query => [ trans_id => \@ids ]) if @ids;

Das sind zwei Seq Scans auf eine Tabelle die in kivitendo recht groÃ werden
kann. AuÃerdem kÃ¶nnte in der Zwischenzeit ein neuer inventory-Eintrag
dazugekommen sein, der damit ignoriert wÃ¼rde.

Im Template wurde dann auf die Rose-Objekte von part, transfer_type, bin und
warehouse zugegriffen, und da diese noch nicht geladen wurden sorgt das im
Extremfall fÃ¼r 40 weitere Datenbankzugriffe.

Das ist alles schÃ¶n kompakter perl Code, aber wie kÃ¶nnte man das aus
Datenbanksicht optimieren, also die Anzahl der Zugriffe verringern und nach
MÃ¶glichkeit Indexe benutzen?

Die Templatezugriffe kÃ¶nnen einfach durch ein with_objects verhindert werden:
    with_objects => [ 'parts', 'trans_type', 'bin', 'bin.warehouse' ],

Statt einer separaten Abfrage fÃ¼r die trans_ids kÃ¶nnte man diese als eine
Unterabfrage in get_all einfÃ¼hren:
  query        => [ trans_id => [ \"$query" ] ]

Die get-all-Abfrage kann man aber noch weiter verbessern, indem man nach id
statt trans_id filtert, da es fÃ¼r id im Gegensatz zu trans_id schon einen Index
gibt. Das Ziel fÃ¼r die Unterabfrage sollte also sein, eine Liste von ids zu
bekommen, damit der Index benutzt wird und man sich den Seq Scan spart.

Das mini-Journal zeigt die letzten 10 Lagerbewegungen, die entweder
Einlagerungen, Auslagerungen oder Umlagerungen sein kÃ¶nnen. Im Fall von
Umlagerungen wÃ¤ren das 2 inventory-EintrÃ¤ge, ansonsten 1. Da wir nicht wissen,
wieviele Umlagerungen dabei sind, holen wir die letzten 20 EintrÃ¤ge, filtern
diese nach den letzten 10 trans_ids, und extrahieren daraus die inventory ids
(zwischen 10 und 20 ids). Die ursprÃ¼ngliche Abfrage mit dem GROUP BY konnte
keinen index nutzen, da das ORDER BY auf max(itime) statt itime war. Durch das
"limit 20" werden zwar potentiell ein paar Zeilen zu viel geholt, dafÃ¼r kann
man aber nun einen Index auf inventory(itime) setzen, der von der Abfrage auch
verwendet werden kann, und damit spart man sich auch den letzten Seq Scan auf
inventory.

create index if not exists inventory_itime_idx on inventory (itime);
---
 SL/Controller/Inventory.pm | 52 +++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/SL/Controller/Inventory.pm b/SL/Controller/Inventory.pm
index f20f6aee8..479c9ad22 100644
--- a/SL/Controller/Inventory.pm
+++ b/SL/Controller/Inventory.pm
@@ -20,6 +20,7 @@ use SL::DBUtils;
 use SL::Helper::Flash;
 use SL::Controller::Helper::ReportGenerator;
 use SL::Controller::Helper::GetModels;
+use List::MoreUtils qw(uniq);
 
 use English qw(-no_match_vars);
 
@@ -781,22 +782,55 @@ sub build_unit_select {
 sub mini_journal {
   my ($self) = @_;
 
-  # get last 10 transaction ids
-  my $query = 'SELECT trans_id, max(itime) FROM inventory GROUP BY trans_id ORDER BY max(itime) DESC LIMIT 10';
-  my @ids = selectall_array_query($::form, $::form->get_standard_dbh, $query);
+  # We want to fetch the last 10 inventory events (inventory rows with the same trans_id)
+  # To prevent a Seq Scan on inventory set an index on inventory.itime
+  # Each event may have one (transfer_in/out) or two (transfer) inventory rows
+  # So fetch the last 20, group by trans_id, limit to the last 10 trans_ids,
+  # and then extract the inventory ids from those 10 trans_ids
+  # By querying Inventory->get_all via the id instead of trans_id we can make
+  # use of the existing index on id
 
-  my $objs;
-  $objs = SL::DB::Manager::Inventory->get_all(query => [ trans_id => \@ids ]) if @ids;
+  # inventory ids of the most recent 10 inventory trans_ids
+  my $query = <<SQL;
+with last_inventories as (
+   select id,
+          trans_id,
+          itime
+     from inventory
+ order by itime desc
+    limit 20
+),
+grouped_ids as (
+   select trans_id,
+          array_agg(id) as ids
+     from last_inventories
+ group by trans_id
+ order by max(itime)
+     desc limit 10
+)
+select unnest(ids)
+  from grouped_ids
+ limit 20  -- so the planner knows how many ids to expect, the cte is an optimisation fence
+SQL
 
-  # at most 2 of them belong to a transaction and the qty determins in or out.
-  # sort them for display
+  my $objs  = SL::DB::Manager::Inventory->get_all(
+    query        => [ id => [ \"$query" ] ],
+    with_objects => [ 'parts', 'trans_type', 'bin', 'bin.warehouse' ], # prevent lazy loading in template
+    sort_by      => 'itime DESC',
+  );
+  # remember order of trans_ids from query, for ordering hash later
+  my @sorted_trans_ids = uniq map { $_->trans_id } @$objs;
+
+  # at most 2 of them belong to a transaction and the qty determines in or out.
   my %transactions;
   for (@$objs) {
     $transactions{ $_->trans_id }{ $_->qty > 0 ? 'in' : 'out' } = $_;
     $transactions{ $_->trans_id }{base} = $_;
   }
-  # and get them into order again
-  my @sorted = map { $transactions{$_} } @ids;
+
+  # because the inventory transactions were built in a hash, we need to sort the
+  # hash by using the original sort order of the trans_ids
+  my @sorted = map { $transactions{$_} } @sorted_trans_ids;
 
   return \@sorted;
 }
-- 
2.20.1